github.com/symblcrowd/bloom@v2.0.5+incompatible/bloom_test.go (about) 1 package bloom 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "encoding/gob" 7 "encoding/json" 8 "math" 9 "testing" 10 ) 11 12 // This implementation of Bloom filters is _not_ 13 // safe for concurrent use. Uncomment the following 14 // method and run go test -race 15 // 16 // func TestConcurrent(t *testing.T) { 17 // gmp := runtime.GOMAXPROCS(2) 18 // defer runtime.GOMAXPROCS(gmp) 19 // 20 // f := New(1000, 4) 21 // n1 := []byte("Bess") 22 // n2 := []byte("Jane") 23 // f.Add(n1) 24 // f.Add(n2) 25 // 26 // var wg sync.WaitGroup 27 // const try = 1000 28 // var err1, err2 error 29 // 30 // wg.Add(1) 31 // go func() { 32 // for i := 0; i < try; i++ { 33 // n1b := f.Test(n1) 34 // if !n1b { 35 // err1 = fmt.Errorf("%v should be in", n1) 36 // break 37 // } 38 // } 39 // wg.Done() 40 // }() 41 // 42 // wg.Add(1) 43 // go func() { 44 // for i := 0; i < try; i++ { 45 // n2b := f.Test(n2) 46 // if !n2b { 47 // err2 = fmt.Errorf("%v should be in", n2) 48 // break 49 // } 50 // } 51 // wg.Done() 52 // }() 53 // 54 // wg.Wait() 55 // 56 // if err1 != nil { 57 // t.Fatal(err1) 58 // } 59 // if err2 != nil { 60 // t.Fatal(err2) 61 // } 62 // } 63 64 func TestBasic(t *testing.T) { 65 f := New(1000, 4) 66 n1 := []byte("Bess") 67 n2 := []byte("Jane") 68 n3 := []byte("Emma") 69 f.Add(n1) 70 n3a := f.TestAndAdd(n3) 71 n1b := f.Test(n1) 72 n2b := f.Test(n2) 73 n3b := f.Test(n3) 74 if !n1b { 75 t.Errorf("%v should be in.", n1) 76 } 77 if n2b { 78 t.Errorf("%v should not be in.", n2) 79 } 80 if n3a { 81 t.Errorf("%v should not be in the first time we look.", n3) 82 } 83 if !n3b { 84 t.Errorf("%v should be in the second time we look.", n3) 85 } 86 } 87 88 func TestBasicUint32(t *testing.T) { 89 f := New(1000, 4) 90 n1 := make([]byte, 4) 91 n2 := make([]byte, 4) 92 n3 := make([]byte, 4) 93 n4 := make([]byte, 4) 94 binary.BigEndian.PutUint32(n1, 100) 95 binary.BigEndian.PutUint32(n2, 101) 96 binary.BigEndian.PutUint32(n3, 102) 97 binary.BigEndian.PutUint32(n4, 103) 98 f.Add(n1) 99 n3a := f.TestAndAdd(n3) 100 n1b := f.Test(n1) 101 n2b := f.Test(n2) 102 n3b := f.Test(n3) 103 f.Test(n4) 104 if !n1b { 105 t.Errorf("%v should be in.", n1) 106 } 107 if n2b { 108 t.Errorf("%v should not be in.", n2) 109 } 110 if n3a { 111 t.Errorf("%v should not be in the first time we look.", n3) 112 } 113 if !n3b { 114 t.Errorf("%v should be in the second time we look.", n3) 115 } 116 } 117 118 func TestNewWithLowNumbers(t *testing.T) { 119 f := New(0, 0) 120 if f.k != 1 { 121 t.Errorf("%v should be 1", f.k) 122 } 123 if f.m != 1 { 124 t.Errorf("%v should be 1", f.m) 125 } 126 } 127 128 func TestString(t *testing.T) { 129 f := NewWithEstimates(1000, 0.001) 130 n1 := "Love" 131 n2 := "is" 132 n3 := "in" 133 n4 := "bloom" 134 f.AddString(n1) 135 n3a := f.TestAndAddString(n3) 136 n1b := f.TestString(n1) 137 n2b := f.TestString(n2) 138 n3b := f.TestString(n3) 139 f.TestString(n4) 140 if !n1b { 141 t.Errorf("%v should be in.", n1) 142 } 143 if n2b { 144 t.Errorf("%v should not be in.", n2) 145 } 146 if n3a { 147 t.Errorf("%v should not be in the first time we look.", n3) 148 } 149 if !n3b { 150 t.Errorf("%v should be in the second time we look.", n3) 151 } 152 153 } 154 155 func testEstimated(n uint, maxFp float64, t *testing.T) { 156 m, k := EstimateParameters(n, maxFp) 157 f := NewWithEstimates(n, maxFp) 158 fpRate := f.EstimateFalsePositiveRate(n) 159 if fpRate > 1.5*maxFp { 160 t.Errorf("False positive rate too high: n: %v; m: %v; k: %v; maxFp: %f; fpRate: %f, fpRate/maxFp: %f", n, m, k, maxFp, fpRate, fpRate/maxFp) 161 } 162 } 163 164 func TestEstimated1000_0001(t *testing.T) { testEstimated(1000, 0.000100, t) } 165 func TestEstimated10000_0001(t *testing.T) { testEstimated(10000, 0.000100, t) } 166 func TestEstimated100000_0001(t *testing.T) { testEstimated(100000, 0.000100, t) } 167 168 func TestEstimated1000_001(t *testing.T) { testEstimated(1000, 0.001000, t) } 169 func TestEstimated10000_001(t *testing.T) { testEstimated(10000, 0.001000, t) } 170 func TestEstimated100000_001(t *testing.T) { testEstimated(100000, 0.001000, t) } 171 172 func TestEstimated1000_01(t *testing.T) { testEstimated(1000, 0.010000, t) } 173 func TestEstimated10000_01(t *testing.T) { testEstimated(10000, 0.010000, t) } 174 func TestEstimated100000_01(t *testing.T) { testEstimated(100000, 0.010000, t) } 175 176 func min(a, b uint) uint { 177 if a < b { 178 return a 179 } 180 return b 181 } 182 183 // The following function courtesy of Nick @turgon 184 // This helper function ranges over the input data, applying the hashing 185 // which returns the bit locations to set in the filter. 186 // For each location, increment a counter for that bit address. 187 // 188 // If the Bloom Filter's location() method distributes locations uniformly 189 // at random, a property it should inherit from its hash function, then 190 // each bit location in the filter should end up with roughly the same 191 // number of hits. Importantly, the value of k should not matter. 192 // 193 // Once the results are collected, we can run a chi squared goodness of fit 194 // test, comparing the result histogram with the uniform distribition. 195 // This yields a test statistic with degrees-of-freedom of m-1. 196 func chiTestBloom(m, k, rounds uint, elements [][]byte) (succeeds bool) { 197 f := New(m, k) 198 results := make([]uint, m) 199 chi := make([]float64, m) 200 201 for _, data := range elements { 202 h := baseHashes(data) 203 for i := uint(0); i < f.k; i++ { 204 results[f.location(h, i)]++ 205 } 206 } 207 208 // Each element of results should contain the same value: k * rounds / m. 209 // Let's run a chi-square goodness of fit and see how it fares. 210 var chiStatistic float64 211 e := float64(k*rounds) / float64(m) 212 for i := uint(0); i < m; i++ { 213 chi[i] = math.Pow(float64(results[i])-e, 2.0) / e 214 chiStatistic += chi[i] 215 } 216 217 // this tests at significant level 0.005 up to 20 degrees of freedom 218 table := [20]float64{ 219 7.879, 10.597, 12.838, 14.86, 16.75, 18.548, 20.278, 220 21.955, 23.589, 25.188, 26.757, 28.3, 29.819, 31.319, 32.801, 34.267, 221 35.718, 37.156, 38.582, 39.997} 222 df := min(m-1, 20) 223 224 succeeds = table[df-1] > chiStatistic 225 return 226 227 } 228 229 func TestLocation(t *testing.T) { 230 var m, k, rounds uint 231 232 m = 8 233 k = 3 234 235 rounds = 100000 // 15000000 236 237 elements := make([][]byte, rounds) 238 239 for x := uint(0); x < rounds; x++ { 240 ctrlist := make([]uint8, 4) 241 ctrlist[0] = uint8(x) 242 ctrlist[1] = uint8(x >> 8) 243 ctrlist[2] = uint8(x >> 16) 244 ctrlist[3] = uint8(x >> 24) 245 data := []byte(ctrlist) 246 elements[x] = data 247 } 248 249 succeeds := chiTestBloom(m, k, rounds, elements) 250 if !succeeds { 251 t.Error("random assignment is too unrandom") 252 } 253 254 } 255 256 func TestCap(t *testing.T) { 257 f := New(1000, 4) 258 if f.Cap() != f.m { 259 t.Error("not accessing Cap() correctly") 260 } 261 } 262 263 func TestK(t *testing.T) { 264 f := New(1000, 4) 265 if f.K() != f.k { 266 t.Error("not accessing K() correctly") 267 } 268 } 269 270 func TestMarshalUnmarshalJSON(t *testing.T) { 271 f := New(1000, 4) 272 data, err := json.Marshal(f) 273 if err != nil { 274 t.Fatal(err.Error()) 275 } 276 277 var g BloomFilter 278 err = json.Unmarshal(data, &g) 279 if err != nil { 280 t.Fatal(err.Error()) 281 } 282 if g.m != f.m { 283 t.Error("invalid m value") 284 } 285 if g.k != f.k { 286 t.Error("invalid k value") 287 } 288 if g.b == nil { 289 t.Fatal("bitset is nil") 290 } 291 if !g.b.Equal(f.b) { 292 t.Error("bitsets are not equal") 293 } 294 } 295 296 func TestUnmarshalInvalidJSON(t *testing.T) { 297 data := []byte("{invalid}") 298 299 var g BloomFilter 300 err := g.UnmarshalJSON(data) 301 if err == nil { 302 t.Error("expected error while unmarshalling invalid data") 303 } 304 } 305 306 func TestWriteToReadFrom(t *testing.T) { 307 var b bytes.Buffer 308 f := New(1000, 4) 309 _, err := f.WriteTo(&b) 310 if err != nil { 311 t.Fatal(err) 312 } 313 314 g := New(1000, 1) 315 _, err = g.ReadFrom(&b) 316 if err != nil { 317 t.Fatal(err) 318 } 319 if g.m != f.m { 320 t.Error("invalid m value") 321 } 322 if g.k != f.k { 323 t.Error("invalid k value") 324 } 325 if g.b == nil { 326 t.Fatal("bitset is nil") 327 } 328 if !g.b.Equal(f.b) { 329 t.Error("bitsets are not equal") 330 } 331 332 g.Test([]byte("")) 333 } 334 335 func TestReadWriteBinary(t *testing.T) { 336 f := New(1000, 4) 337 var buf bytes.Buffer 338 bytesWritten, err := f.WriteTo(&buf) 339 if err != nil { 340 t.Fatal(err.Error()) 341 } 342 if bytesWritten != int64(buf.Len()) { 343 t.Errorf("incorrect write length %d != %d", bytesWritten, buf.Len()) 344 } 345 346 var g BloomFilter 347 bytesRead, err := g.ReadFrom(&buf) 348 if err != nil { 349 t.Fatal(err.Error()) 350 } 351 if bytesRead != bytesWritten { 352 t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten) 353 } 354 if g.m != f.m { 355 t.Error("invalid m value") 356 } 357 if g.k != f.k { 358 t.Error("invalid k value") 359 } 360 if g.b == nil { 361 t.Fatal("bitset is nil") 362 } 363 if !g.b.Equal(f.b) { 364 t.Error("bitsets are not equal") 365 } 366 } 367 368 func TestEncodeDecodeGob(t *testing.T) { 369 f := New(1000, 4) 370 f.Add([]byte("one")) 371 f.Add([]byte("two")) 372 f.Add([]byte("three")) 373 var buf bytes.Buffer 374 err := gob.NewEncoder(&buf).Encode(f) 375 if err != nil { 376 t.Fatal(err.Error()) 377 } 378 379 var g BloomFilter 380 err = gob.NewDecoder(&buf).Decode(&g) 381 if err != nil { 382 t.Fatal(err.Error()) 383 } 384 if g.m != f.m { 385 t.Error("invalid m value") 386 } 387 if g.k != f.k { 388 t.Error("invalid k value") 389 } 390 if g.b == nil { 391 t.Fatal("bitset is nil") 392 } 393 if !g.b.Equal(f.b) { 394 t.Error("bitsets are not equal") 395 } 396 if !g.Test([]byte("three")) { 397 t.Errorf("missing value 'three'") 398 } 399 if !g.Test([]byte("two")) { 400 t.Errorf("missing value 'two'") 401 } 402 if !g.Test([]byte("one")) { 403 t.Errorf("missing value 'one'") 404 } 405 } 406 407 func TestEqual(t *testing.T) { 408 f := New(1000, 4) 409 f1 := New(1000, 4) 410 g := New(1000, 20) 411 h := New(10, 20) 412 n1 := []byte("Bess") 413 f1.Add(n1) 414 if !f.Equal(f) { 415 t.Errorf("%v should be equal to itself", f) 416 } 417 if f.Equal(f1) { 418 t.Errorf("%v should not be equal to %v", f, f1) 419 } 420 if f.Equal(g) { 421 t.Errorf("%v should not be equal to %v", f, g) 422 } 423 if f.Equal(h) { 424 t.Errorf("%v should not be equal to %v", f, h) 425 } 426 } 427 428 func BenchmarkEstimated(b *testing.B) { 429 for n := uint(100000); n <= 100000; n *= 10 { 430 for fp := 0.1; fp >= 0.0001; fp /= 10.0 { 431 f := NewWithEstimates(n, fp) 432 f.EstimateFalsePositiveRate(n) 433 } 434 } 435 } 436 437 func BenchmarkSeparateTestAndAdd(b *testing.B) { 438 f := NewWithEstimates(uint(b.N), 0.0001) 439 key := make([]byte, 100) 440 b.ResetTimer() 441 for i := 0; i < b.N; i++ { 442 binary.BigEndian.PutUint32(key, uint32(i)) 443 f.Test(key) 444 f.Add(key) 445 } 446 } 447 448 func BenchmarkCombinedTestAndAdd(b *testing.B) { 449 f := NewWithEstimates(uint(b.N), 0.0001) 450 key := make([]byte, 100) 451 b.ResetTimer() 452 for i := 0; i < b.N; i++ { 453 binary.BigEndian.PutUint32(key, uint32(i)) 454 f.TestAndAdd(key) 455 } 456 } 457 458 func TestMerge(t *testing.T) { 459 f := New(1000, 4) 460 n1 := []byte("f") 461 f.Add(n1) 462 463 g := New(1000, 4) 464 n2 := []byte("g") 465 g.Add(n2) 466 467 h := New(999, 4) 468 n3 := []byte("h") 469 h.Add(n3) 470 471 j := New(1000, 5) 472 n4 := []byte("j") 473 j.Add(n4) 474 475 err := f.Merge(g) 476 if err != nil { 477 t.Errorf("There should be no error when merging two similar filters") 478 } 479 480 err = f.Merge(h) 481 if err == nil { 482 t.Errorf("There should be an error when merging filters with mismatched m") 483 } 484 485 err = f.Merge(j) 486 if err == nil { 487 t.Errorf("There should be an error when merging filters with mismatched k") 488 } 489 490 n2b := f.Test(n2) 491 if !n2b { 492 t.Errorf("The value doesn't exist after a valid merge") 493 } 494 495 n3b := f.Test(n3) 496 if n3b { 497 t.Errorf("The value exists after an invalid merge") 498 } 499 500 n4b := f.Test(n4) 501 if n4b { 502 t.Errorf("The value exists after an invalid merge") 503 } 504 } 505 506 func TestCopy(t *testing.T) { 507 f := New(1000, 4) 508 n1 := []byte("f") 509 f.Add(n1) 510 511 // copy here instead of New 512 g := f.Copy() 513 n2 := []byte("g") 514 g.Add(n2) 515 516 n1fb := f.Test(n1) 517 if !n1fb { 518 t.Errorf("The value doesn't exist in original after making a copy") 519 } 520 521 n1gb := g.Test(n1) 522 if !n1gb { 523 t.Errorf("The value doesn't exist in the copy") 524 } 525 526 n2fb := f.Test(n2) 527 if n2fb { 528 t.Errorf("The value exists in the original, it should only exist in copy") 529 } 530 531 n2gb := g.Test(n2) 532 if !n2gb { 533 t.Errorf("The value doesn't exist in copy after Add()") 534 } 535 } 536 537 func TestFrom(t *testing.T) { 538 var ( 539 k = uint(5) 540 data = make([]uint64, 10) 541 test = []byte("test") 542 ) 543 544 bf := From(data, k) 545 if bf.K() != k { 546 t.Errorf("Constant k does not match the expected value") 547 } 548 549 if bf.Cap() != uint(len(data)*64) { 550 t.Errorf("Capacity does not match the expected value") 551 } 552 553 if bf.Test(test) { 554 t.Errorf("Bloom filter should not contain the value") 555 } 556 557 bf.Add(test) 558 if !bf.Test(test) { 559 t.Errorf("Bloom filter should contain the value") 560 } 561 562 // create a new Bloom filter from an existing (populated) data slice. 563 bf = From(data, k) 564 if !bf.Test(test) { 565 t.Errorf("Bloom filter should contain the value") 566 } 567 } 568 569 func TestTestLocations(t *testing.T) { 570 f := NewWithEstimates(1000, 0.001) 571 n1 := []byte("Love") 572 n2 := []byte("is") 573 n3 := []byte("in") 574 n4 := []byte("bloom") 575 f.Add(n1) 576 n3a := f.TestLocations(Locations(n3, f.K())) 577 f.Add(n3) 578 n1b := f.TestLocations(Locations(n1, f.K())) 579 n2b := f.TestLocations(Locations(n2, f.K())) 580 n3b := f.TestLocations(Locations(n3, f.K())) 581 n4b := f.TestLocations(Locations(n4, f.K())) 582 if !n1b { 583 t.Errorf("%v should be in.", n1) 584 } 585 if n2b { 586 t.Errorf("%v should not be in.", n2) 587 } 588 if n3a { 589 t.Errorf("%v should not be in the first time we look.", n3) 590 } 591 if !n3b { 592 t.Errorf("%v should be in the second time we look.", n3) 593 } 594 if n4b { 595 t.Errorf("%v should be in.", n4) 596 } 597 }