github.com/tursom/GoCollections@v0.3.10/util/bloom/Bloom_test.go (about) 1 /* 2 * Copyright (c) 2023 tursom. All rights reserved. 3 * Use of this source code is governed by a GPL-3 4 * license that can be found in the LICENSE file. 5 */ 6 7 package bloom 8 9 import ( 10 "bytes" 11 "compress/gzip" 12 "fmt" 13 "math" 14 "testing" 15 16 "github.com/tursom/GoCollections/util/time" 17 ) 18 19 func TestBloom_Contains(t *testing.T) { 20 bloom := NewBloom(100_0000, 0.1) 21 bloom.Add([]byte("hello, world!")) 22 for i := 0; i < 10000; i++ { 23 bloom.Add([]byte(fmt.Sprintf("%d", i))) 24 } 25 26 fmt.Println(len(bloom.m)) 27 fmt.Println(len(gz(bloom.m.Bytes()))) 28 29 if !bloom.Contains([]byte("hello, world!")) { 30 t.Failed() 31 } 32 33 if bloom.Contains([]byte("hello, not world!")) { 34 t.Failed() 35 } 36 } 37 38 func TestBloom_miss(t *testing.T) { 39 //HashFunc = func(data []byte, seed uint32) uint32 { 40 // return murmur3.Sum32WithSeed(data, seed) 41 // // h1, _ := murmur3.Sum128WithSeed(data, seed) 42 // // return uint32(h1) 43 //} 44 45 var base uint = 1000_0000 46 bloom := NewBloom(base, 0.03) 47 48 t1 := time.Now() 49 50 for i := 0; i < int(base); i++ { 51 bloom.Add([]byte(fmt.Sprintf("%d", i))) 52 } 53 54 counter := make([]uint, 256) 55 for _, value := range bloom.m { 56 counter[value]++ 57 } 58 59 miss := 0 60 for i := base; i < base*2; i++ { 61 if bloom.Contains([]byte(fmt.Sprintf("%d", i))) { 62 miss += 1 63 } 64 } 65 66 t2 := time.Now() 67 68 fmt.Println(miss, float64(miss)/float64(base)) 69 fmt.Println(counter) 70 71 var H float64 72 for _, c := range counter { 73 if c == 0 { 74 continue 75 } 76 p := float64(c) / float64(len(bloom.m)) 77 H += -p * math.Log2(p) 78 } 79 fmt.Println(H / 8) 80 fmt.Println(t2.Sub(t1)) 81 } 82 83 func gz(b []byte) []byte { 84 buffer := bytes.NewBuffer(nil) 85 86 writer := gzip.NewWriter(buffer) 87 writer.Write(b) 88 writer.Flush() 89 90 return buffer.Bytes() 91 } 92 93 func TestCalcBitLength(t *testing.T) { 94 //fmt.Printf("%d\n", CalcBitLength(1024*1024*1024, 0.03)/8) 95 for i := 0; i < 63; i++ { 96 var n uint = 1 << i 97 numBytes := CalcBitLength(n, 0.1) / 8 98 fmt.Printf("%d: %d, %s / %s = %f\n", 99 i, 100 NumHashFunctions(n, numBytes*8), 101 storageFormat(numBytes), 102 storageFormat(n), 103 float64(numBytes)/float64(n)) 104 } 105 } 106 107 func storageFormat(size uint) string { 108 var base uint = 1 109 if size < 1024*base { 110 return fmt.Sprintf("%db", size/base) 111 } 112 113 base *= 1024 114 if size < 1024*base { 115 return fmt.Sprintf("%fkb", float64(size)/float64(base)) 116 } 117 118 base *= 1024 119 if size < 1024*base { 120 return fmt.Sprintf("%fmb", float64(size)/float64(base)) 121 } 122 123 base *= 1024 124 if size < 1024*base { 125 return fmt.Sprintf("%fgb", float64(size)/float64(base)) 126 } 127 128 base *= 1024 129 return fmt.Sprintf("%ftb", float64(size)/float64(base)) 130 }