github.com/bits-and-blooms/bloom/v3@v3.5.0/bloom_test.go (about) 1 package bloom 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "encoding/gob" 7 "encoding/json" 8 "math" 9 "testing" 10 ) 11 12 // This implementation of Bloom filters is _not_ 13 // safe for concurrent use. Uncomment the following 14 // method and run go test -race 15 // 16 // func TestConcurrent(t *testing.T) { 17 // gmp := runtime.GOMAXPROCS(2) 18 // defer runtime.GOMAXPROCS(gmp) 19 // 20 // f := New(1000, 4) 21 // n1 := []byte("Bess") 22 // n2 := []byte("Jane") 23 // f.Add(n1) 24 // f.Add(n2) 25 // 26 // var wg sync.WaitGroup 27 // const try = 1000 28 // var err1, err2 error 29 // 30 // wg.Add(1) 31 // go func() { 32 // for i := 0; i < try; i++ { 33 // n1b := f.Test(n1) 34 // if !n1b { 35 // err1 = fmt.Errorf("%v should be in", n1) 36 // break 37 // } 38 // } 39 // wg.Done() 40 // }() 41 // 42 // wg.Add(1) 43 // go func() { 44 // for i := 0; i < try; i++ { 45 // n2b := f.Test(n2) 46 // if !n2b { 47 // err2 = fmt.Errorf("%v should be in", n2) 48 // break 49 // } 50 // } 51 // wg.Done() 52 // }() 53 // 54 // wg.Wait() 55 // 56 // if err1 != nil { 57 // t.Fatal(err1) 58 // } 59 // if err2 != nil { 60 // t.Fatal(err2) 61 // } 62 // } 63 64 func TestBasic(t *testing.T) { 65 f := New(1000, 4) 66 n1 := []byte("Bess") 67 n2 := []byte("Jane") 68 n3 := []byte("Emma") 69 f.Add(n1) 70 n3a := f.TestAndAdd(n3) 71 n1b := f.Test(n1) 72 n2b := f.Test(n2) 73 n3b := f.Test(n3) 74 if !n1b { 75 t.Errorf("%v should be in.", n1) 76 } 77 if n2b { 78 t.Errorf("%v should not be in.", n2) 79 } 80 if n3a { 81 t.Errorf("%v should not be in the first time we look.", n3) 82 } 83 if !n3b { 84 t.Errorf("%v should be in the second time we look.", n3) 85 } 86 } 87 88 func TestBasicUint32(t *testing.T) { 89 f := New(1000, 4) 90 n1 := make([]byte, 4) 91 n2 := make([]byte, 4) 92 n3 := make([]byte, 4) 93 n4 := make([]byte, 4) 94 n5 := make([]byte, 4) 95 binary.BigEndian.PutUint32(n1, 100) 96 binary.BigEndian.PutUint32(n2, 101) 97 binary.BigEndian.PutUint32(n3, 102) 98 binary.BigEndian.PutUint32(n4, 103) 99 binary.BigEndian.PutUint32(n5, 104) 100 f.Add(n1) 101 n3a := f.TestAndAdd(n3) 102 n1b := f.Test(n1) 103 n2b := f.Test(n2) 104 n3b := f.Test(n3) 105 n5a := f.TestOrAdd(n5) 106 n5b := f.Test(n5) 107 f.Test(n4) 108 if !n1b { 109 t.Errorf("%v should be in.", n1) 110 } 111 if n2b { 112 t.Errorf("%v should not be in.", n2) 113 } 114 if n3a { 115 t.Errorf("%v should not be in the first time we look.", n3) 116 } 117 if !n3b { 118 t.Errorf("%v should be in the second time we look.", n3) 119 } 120 if n5a { 121 t.Errorf("%v should not be in the first time we look.", n5) 122 } 123 if !n5b { 124 t.Errorf("%v should be in the second time we look.", n5) 125 } 126 } 127 128 func TestNewWithLowNumbers(t *testing.T) { 129 f := New(0, 0) 130 if f.k != 1 { 131 t.Errorf("%v should be 1", f.k) 132 } 133 if f.m != 1 { 134 t.Errorf("%v should be 1", f.m) 135 } 136 } 137 138 func TestString(t *testing.T) { 139 f := NewWithEstimates(1000, 0.001) 140 n1 := "Love" 141 n2 := "is" 142 n3 := "in" 143 n4 := "bloom" 144 n5 := "blooms" 145 f.AddString(n1) 146 n3a := f.TestAndAddString(n3) 147 n1b := f.TestString(n1) 148 n2b := f.TestString(n2) 149 n3b := f.TestString(n3) 150 n5a := f.TestOrAddString(n5) 151 n5b := f.TestString(n5) 152 f.TestString(n4) 153 if !n1b { 154 t.Errorf("%v should be in.", n1) 155 } 156 if n2b { 157 t.Errorf("%v should not be in.", n2) 158 } 159 if n3a { 160 t.Errorf("%v should not be in the first time we look.", n3) 161 } 162 if !n3b { 163 t.Errorf("%v should be in the second time we look.", n3) 164 } 165 if n5a { 166 t.Errorf("%v should not be in the first time we look.", n5) 167 } 168 if !n5b { 169 t.Errorf("%v should be in the second time we look.", n5) 170 } 171 172 } 173 174 func testEstimated(n uint, maxFp float64, t *testing.T) { 175 m, k := EstimateParameters(n, maxFp) 176 fpRate := EstimateFalsePositiveRate(m, k, n) 177 if fpRate > 1.5*maxFp { 178 t.Errorf("False positive rate too high: n: %v; m: %v; k: %v; maxFp: %f; fpRate: %f, fpRate/maxFp: %f", n, m, k, maxFp, fpRate, fpRate/maxFp) 179 } 180 } 181 182 func TestEstimated1000_0001(t *testing.T) { testEstimated(1000, 0.000100, t) } 183 func TestEstimated10000_0001(t *testing.T) { testEstimated(10000, 0.000100, t) } 184 func TestEstimated100000_0001(t *testing.T) { testEstimated(100000, 0.000100, t) } 185 186 func TestEstimated1000_001(t *testing.T) { testEstimated(1000, 0.001000, t) } 187 func TestEstimated10000_001(t *testing.T) { testEstimated(10000, 0.001000, t) } 188 func TestEstimated100000_001(t *testing.T) { testEstimated(100000, 0.001000, t) } 189 190 func TestEstimated1000_01(t *testing.T) { testEstimated(1000, 0.010000, t) } 191 func TestEstimated10000_01(t *testing.T) { testEstimated(10000, 0.010000, t) } 192 func TestEstimated100000_01(t *testing.T) { testEstimated(100000, 0.010000, t) } 193 194 func min(a, b uint) uint { 195 if a < b { 196 return a 197 } 198 return b 199 } 200 201 // The following function courtesy of Nick @turgon 202 // This helper function ranges over the input data, applying the hashing 203 // which returns the bit locations to set in the filter. 204 // For each location, increment a counter for that bit address. 205 // 206 // If the Bloom Filter's location() method distributes locations uniformly 207 // at random, a property it should inherit from its hash function, then 208 // each bit location in the filter should end up with roughly the same 209 // number of hits. Importantly, the value of k should not matter. 210 // 211 // Once the results are collected, we can run a chi squared goodness of fit 212 // test, comparing the result histogram with the uniform distribition. 213 // This yields a test statistic with degrees-of-freedom of m-1. 214 func chiTestBloom(m, k, rounds uint, elements [][]byte) (succeeds bool) { 215 f := New(m, k) 216 results := make([]uint, m) 217 chi := make([]float64, m) 218 219 for _, data := range elements { 220 h := baseHashes(data) 221 for i := uint(0); i < f.k; i++ { 222 results[f.location(h, i)]++ 223 } 224 } 225 226 // Each element of results should contain the same value: k * rounds / m. 227 // Let's run a chi-square goodness of fit and see how it fares. 228 var chiStatistic float64 229 e := float64(k*rounds) / float64(m) 230 for i := uint(0); i < m; i++ { 231 chi[i] = math.Pow(float64(results[i])-e, 2.0) / e 232 chiStatistic += chi[i] 233 } 234 235 // this tests at significant level 0.005 up to 20 degrees of freedom 236 table := [20]float64{ 237 7.879, 10.597, 12.838, 14.86, 16.75, 18.548, 20.278, 238 21.955, 23.589, 25.188, 26.757, 28.3, 29.819, 31.319, 32.801, 34.267, 239 35.718, 37.156, 38.582, 39.997} 240 df := min(m-1, 20) 241 242 succeeds = table[df-1] > chiStatistic 243 return 244 245 } 246 247 func TestLocation(t *testing.T) { 248 var m, k, rounds uint 249 250 m = 8 251 k = 3 252 253 rounds = 100000 // 15000000 254 255 elements := make([][]byte, rounds) 256 257 for x := uint(0); x < rounds; x++ { 258 ctrlist := make([]uint8, 4) 259 ctrlist[0] = uint8(x) 260 ctrlist[1] = uint8(x >> 8) 261 ctrlist[2] = uint8(x >> 16) 262 ctrlist[3] = uint8(x >> 24) 263 data := []byte(ctrlist) 264 elements[x] = data 265 } 266 267 succeeds := chiTestBloom(m, k, rounds, elements) 268 if !succeeds { 269 t.Error("random assignment is too unrandom") 270 } 271 272 } 273 274 func TestCap(t *testing.T) { 275 f := New(1000, 4) 276 if f.Cap() != f.m { 277 t.Error("not accessing Cap() correctly") 278 } 279 } 280 281 func TestK(t *testing.T) { 282 f := New(1000, 4) 283 if f.K() != f.k { 284 t.Error("not accessing K() correctly") 285 } 286 } 287 288 func TestMarshalUnmarshalJSON(t *testing.T) { 289 f := New(1000, 4) 290 data, err := json.Marshal(f) 291 if err != nil { 292 t.Fatal(err.Error()) 293 } 294 295 var g BloomFilter 296 err = json.Unmarshal(data, &g) 297 if err != nil { 298 t.Fatal(err.Error()) 299 } 300 if g.m != f.m { 301 t.Error("invalid m value") 302 } 303 if g.k != f.k { 304 t.Error("invalid k value") 305 } 306 if g.b == nil { 307 t.Fatal("bitset is nil") 308 } 309 if !g.b.Equal(f.b) { 310 t.Error("bitsets are not equal") 311 } 312 } 313 314 func TestUnmarshalInvalidJSON(t *testing.T) { 315 data := []byte("{invalid}") 316 317 var g BloomFilter 318 err := g.UnmarshalJSON(data) 319 if err == nil { 320 t.Error("expected error while unmarshalling invalid data") 321 } 322 } 323 324 func TestWriteToReadFrom(t *testing.T) { 325 var b bytes.Buffer 326 f := New(1000, 4) 327 _, err := f.WriteTo(&b) 328 if err != nil { 329 t.Fatal(err) 330 } 331 332 g := New(1000, 1) 333 _, err = g.ReadFrom(&b) 334 if err != nil { 335 t.Fatal(err) 336 } 337 if g.m != f.m { 338 t.Error("invalid m value") 339 } 340 if g.k != f.k { 341 t.Error("invalid k value") 342 } 343 if g.b == nil { 344 t.Fatal("bitset is nil") 345 } 346 if !g.b.Equal(f.b) { 347 t.Error("bitsets are not equal") 348 } 349 350 g.Test([]byte("")) 351 } 352 353 func TestReadWriteBinary(t *testing.T) { 354 f := New(1000, 4) 355 var buf bytes.Buffer 356 bytesWritten, err := f.WriteTo(&buf) 357 if err != nil { 358 t.Fatal(err.Error()) 359 } 360 if bytesWritten != int64(buf.Len()) { 361 t.Errorf("incorrect write length %d != %d", bytesWritten, buf.Len()) 362 } 363 364 var g BloomFilter 365 bytesRead, err := g.ReadFrom(&buf) 366 if err != nil { 367 t.Fatal(err.Error()) 368 } 369 if bytesRead != bytesWritten { 370 t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten) 371 } 372 if g.m != f.m { 373 t.Error("invalid m value") 374 } 375 if g.k != f.k { 376 t.Error("invalid k value") 377 } 378 if g.b == nil { 379 t.Fatal("bitset is nil") 380 } 381 if !g.b.Equal(f.b) { 382 t.Error("bitsets are not equal") 383 } 384 } 385 386 func TestEncodeDecodeGob(t *testing.T) { 387 f := New(1000, 4) 388 f.Add([]byte("one")) 389 f.Add([]byte("two")) 390 f.Add([]byte("three")) 391 var buf bytes.Buffer 392 err := gob.NewEncoder(&buf).Encode(f) 393 if err != nil { 394 t.Fatal(err.Error()) 395 } 396 397 var g BloomFilter 398 err = gob.NewDecoder(&buf).Decode(&g) 399 if err != nil { 400 t.Fatal(err.Error()) 401 } 402 if g.m != f.m { 403 t.Error("invalid m value") 404 } 405 if g.k != f.k { 406 t.Error("invalid k value") 407 } 408 if g.b == nil { 409 t.Fatal("bitset is nil") 410 } 411 if !g.b.Equal(f.b) { 412 t.Error("bitsets are not equal") 413 } 414 if !g.Test([]byte("three")) { 415 t.Errorf("missing value 'three'") 416 } 417 if !g.Test([]byte("two")) { 418 t.Errorf("missing value 'two'") 419 } 420 if !g.Test([]byte("one")) { 421 t.Errorf("missing value 'one'") 422 } 423 } 424 425 func TestEqual(t *testing.T) { 426 f := New(1000, 4) 427 f1 := New(1000, 4) 428 g := New(1000, 20) 429 h := New(10, 20) 430 n1 := []byte("Bess") 431 f1.Add(n1) 432 if !f.Equal(f) { 433 t.Errorf("%v should be equal to itself", f) 434 } 435 if f.Equal(f1) { 436 t.Errorf("%v should not be equal to %v", f, f1) 437 } 438 if f.Equal(g) { 439 t.Errorf("%v should not be equal to %v", f, g) 440 } 441 if f.Equal(h) { 442 t.Errorf("%v should not be equal to %v", f, h) 443 } 444 } 445 446 func BenchmarkEstimated(b *testing.B) { 447 for n := uint(100000); n <= 100000; n *= 10 { 448 for fp := 0.1; fp >= 0.0001; fp /= 10.0 { 449 f := NewWithEstimates(n, fp) 450 EstimateFalsePositiveRate(f.m, f.k, n) 451 } 452 } 453 } 454 455 func BenchmarkSeparateTestAndAdd(b *testing.B) { 456 f := NewWithEstimates(uint(b.N), 0.0001) 457 key := make([]byte, 100) 458 b.ResetTimer() 459 for i := 0; i < b.N; i++ { 460 binary.BigEndian.PutUint32(key, uint32(i)) 461 f.Test(key) 462 f.Add(key) 463 } 464 } 465 466 func BenchmarkCombinedTestAndAdd(b *testing.B) { 467 f := NewWithEstimates(uint(b.N), 0.0001) 468 key := make([]byte, 100) 469 b.ResetTimer() 470 for i := 0; i < b.N; i++ { 471 binary.BigEndian.PutUint32(key, uint32(i)) 472 f.TestAndAdd(key) 473 } 474 } 475 476 func TestMerge(t *testing.T) { 477 f := New(1000, 4) 478 n1 := []byte("f") 479 f.Add(n1) 480 481 g := New(1000, 4) 482 n2 := []byte("g") 483 g.Add(n2) 484 485 h := New(999, 4) 486 n3 := []byte("h") 487 h.Add(n3) 488 489 j := New(1000, 5) 490 n4 := []byte("j") 491 j.Add(n4) 492 493 err := f.Merge(g) 494 if err != nil { 495 t.Errorf("There should be no error when merging two similar filters") 496 } 497 498 err = f.Merge(h) 499 if err == nil { 500 t.Errorf("There should be an error when merging filters with mismatched m") 501 } 502 503 err = f.Merge(j) 504 if err == nil { 505 t.Errorf("There should be an error when merging filters with mismatched k") 506 } 507 508 n2b := f.Test(n2) 509 if !n2b { 510 t.Errorf("The value doesn't exist after a valid merge") 511 } 512 513 n3b := f.Test(n3) 514 if n3b { 515 t.Errorf("The value exists after an invalid merge") 516 } 517 518 n4b := f.Test(n4) 519 if n4b { 520 t.Errorf("The value exists after an invalid merge") 521 } 522 } 523 524 func TestCopy(t *testing.T) { 525 f := New(1000, 4) 526 n1 := []byte("f") 527 f.Add(n1) 528 529 // copy here instead of New 530 g := f.Copy() 531 n2 := []byte("g") 532 g.Add(n2) 533 534 n1fb := f.Test(n1) 535 if !n1fb { 536 t.Errorf("The value doesn't exist in original after making a copy") 537 } 538 539 n1gb := g.Test(n1) 540 if !n1gb { 541 t.Errorf("The value doesn't exist in the copy") 542 } 543 544 n2fb := f.Test(n2) 545 if n2fb { 546 t.Errorf("The value exists in the original, it should only exist in copy") 547 } 548 549 n2gb := g.Test(n2) 550 if !n2gb { 551 t.Errorf("The value doesn't exist in copy after Add()") 552 } 553 } 554 555 func TestFrom(t *testing.T) { 556 var ( 557 k = uint(5) 558 data = make([]uint64, 10) 559 test = []byte("test") 560 ) 561 562 bf := From(data, k) 563 if bf.K() != k { 564 t.Errorf("Constant k does not match the expected value") 565 } 566 567 if bf.Cap() != uint(len(data)*64) { 568 t.Errorf("Capacity does not match the expected value") 569 } 570 571 if bf.Test(test) { 572 t.Errorf("Bloom filter should not contain the value") 573 } 574 575 bf.Add(test) 576 if !bf.Test(test) { 577 t.Errorf("Bloom filter should contain the value") 578 } 579 580 // create a new Bloom filter from an existing (populated) data slice. 581 bf = From(data, k) 582 if !bf.Test(test) { 583 t.Errorf("Bloom filter should contain the value") 584 } 585 } 586 587 func TestTestLocations(t *testing.T) { 588 f := NewWithEstimates(1000, 0.001) 589 n1 := []byte("Love") 590 n2 := []byte("is") 591 n3 := []byte("in") 592 n4 := []byte("bloom") 593 f.Add(n1) 594 n3a := f.TestLocations(Locations(n3, f.K())) 595 f.Add(n3) 596 n1b := f.TestLocations(Locations(n1, f.K())) 597 n2b := f.TestLocations(Locations(n2, f.K())) 598 n3b := f.TestLocations(Locations(n3, f.K())) 599 n4b := f.TestLocations(Locations(n4, f.K())) 600 if !n1b { 601 t.Errorf("%v should be in.", n1) 602 } 603 if n2b { 604 t.Errorf("%v should not be in.", n2) 605 } 606 if n3a { 607 t.Errorf("%v should not be in the first time we look.", n3) 608 } 609 if !n3b { 610 t.Errorf("%v should be in the second time we look.", n3) 611 } 612 if n4b { 613 t.Errorf("%v should be in.", n4) 614 } 615 } 616 617 func TestApproximatedSize(t *testing.T) { 618 f := NewWithEstimates(1000, 0.001) 619 f.Add([]byte("Love")) 620 f.Add([]byte("is")) 621 f.Add([]byte("in")) 622 f.Add([]byte("bloom")) 623 size := f.ApproximatedSize() 624 if size != 4 { 625 t.Errorf("%d should equal 4.", size) 626 } 627 } 628 629 func TestFPP(t *testing.T) { 630 f := NewWithEstimates(1000, 0.001) 631 for i := uint32(0); i < 1000; i++ { 632 n := make([]byte, 4) 633 binary.BigEndian.PutUint32(n, i) 634 f.Add(n) 635 } 636 count := 0 637 638 for i := uint32(0); i < 1000; i++ { 639 n := make([]byte, 4) 640 binary.BigEndian.PutUint32(n, i+1000) 641 if f.Test(n) { 642 count += 1 643 } 644 } 645 if float64(count)/1000.0 > 0.001 { 646 t.Errorf("Excessive fpp") 647 } 648 } 649 650 func TestEncodeDecodeBinary(t *testing.T) { 651 f := New(1000, 4) 652 f.Add([]byte("one")) 653 f.Add([]byte("two")) 654 f.Add([]byte("three")) 655 data, err := f.MarshalBinary() 656 if err != nil { 657 t.Fatal(err.Error()) 658 } 659 660 var g BloomFilter 661 err = g.UnmarshalBinary(data) 662 if err != nil { 663 t.Fatal(err.Error()) 664 } 665 if g.m != f.m { 666 t.Error("invalid m value") 667 } 668 if g.k != f.k { 669 t.Error("invalid k value") 670 } 671 if g.b == nil { 672 t.Fatal("bitset is nil") 673 } 674 if !g.b.Equal(f.b) { 675 t.Error("bitsets are not equal") 676 } 677 if !g.Test([]byte("three")) { 678 t.Errorf("missing value 'three'") 679 } 680 if !g.Test([]byte("two")) { 681 t.Errorf("missing value 'two'") 682 } 683 if !g.Test([]byte("one")) { 684 t.Errorf("missing value 'one'") 685 } 686 }