github.com/scottcagno/storage@v1.8.0/pkg/bloom/bloom_test.go (about) 1 /* 2 * 3 * * // Copyright (c) 2021 Scott Cagno. All rights reserved. 4 * * // The license can be found in the root of this project; see LICENSE. 5 * 6 */ 7 8 package bloom 9 10 import ( 11 "bytes" 12 _ "embed" 13 "fmt" 14 "github.com/scottcagno/storage/pkg/hashmap/openaddr" 15 "github.com/scottcagno/storage/pkg/util" 16 "log" 17 "runtime" 18 "strconv" 19 "testing" 20 "time" 21 ) 22 23 var data = [11][]byte{ 24 []byte("key-000000"), 25 []byte("Hendrix Avalos"), 26 []byte("Yasmin Mellor"), 27 []byte("Coco Mueller"), 28 []byte("Bodhi Jimenez"), 29 []byte("Seth Kinney"), 30 []byte("Carla Le"), 31 []byte("Kajus Spooner"), 32 []byte("Javier Barrera"), 33 []byte("Junaid O'Brien"), 34 []byte("Emma Guest"), 35 } 36 37 var sizeMB = 1<<20 - 1 38 39 func TestBoomFilterSize(t *testing.T) { 40 n := uint(1 * sizeMB) 41 bf := NewBloomFilter(n) 42 43 fmt.Printf("opening a bloom filter of size: %d\n", n) 44 fmt.Println("bloom filter:", util.Sizeof(bf)) 45 fmt.Println("bloom count:", bf.Count()) 46 fmt.Println("bloom size:", bf.Size()) 47 fmt.Println("put foo") 48 bf.Set([]byte("foo")) 49 fmt.Println("has foo", bf.Has([]byte("foo"))) 50 fmt.Println("has foo1", bf.Has([]byte("foo1"))) 51 fmt.Println("has bar", bf.Has([]byte("bar"))) 52 fmt.Println("del foo") 53 bf.Unset([]byte("foo")) 54 fmt.Println("has foo", bf.Has([]byte("foo"))) 55 } 56 57 func TestBloomFilter(t *testing.T) { 58 // test new filter 59 bf := NewBloomFilter(1 << 12) 60 fmt.Println(util.Sizeof(bf)) 61 62 // test adding data 63 for i := 0; i < len(data); i++ { 64 key := data[i] 65 bf.Set(key) 66 fmt.Printf("bf.Set(%q)\n", key) 67 } 68 69 // test checking data 70 for i := 0; i < len(data); i++ { 71 key := data[i] 72 ok := bf.Has(key) 73 fmt.Printf("bf.Has(%q): %v\n", key, ok) 74 if !ok { 75 t.Errorf("error: expected=%v, got=%v\n", true, ok) 76 } 77 key = []byte("key-000000_key_does_not_exist") 78 ok = bf.Has(key) 79 if ok { 80 t.Errorf("error: expected=%v, got=%v\n", false, ok) 81 } 82 } 83 84 for i := 0; i < len(data); i++ { 85 key := []byte("key-" + strconv.Itoa(i)) 86 ok := bf.Has(key) 87 if ok { 88 t.Errorf("error: expected=%v, got=%v\n", false, ok) 89 } 90 } 91 92 // test unseting data 93 for i := 0; i < len(data); i++ { 94 key := data[i] 95 bf.Unset(key) 96 fmt.Printf("bf.Unset(%q)\n", key) 97 } 98 99 // test checking data 100 for i := 0; i < len(data); i++ { 101 key := data[i] 102 ok := bf.Has(key) 103 fmt.Printf("bf.Has(%q): %v\n", key, ok) 104 if ok { 105 t.Errorf("error: expected=%v, got=%v\n", false, ok) 106 } 107 key = []byte("key-000000_key_does_not_exist") 108 ok = bf.Has(key) 109 if ok { 110 t.Errorf("error: expected=%v, got=%v\n", false, ok) 111 } 112 } 113 114 for i := 0; i < len(data); i++ { 115 key := []byte("key-" + strconv.Itoa(i)) 116 ok := bf.Has(key) 117 if ok { 118 t.Errorf("error: expected=%v, got=%v\n", false, ok) 119 } 120 } 121 } 122 123 func track(msg string) (string, time.Time) { 124 return msg, time.Now() 125 } 126 127 func duration(msg string, start time.Time) { 128 log.Printf("%v: %v\n", msg, time.Since(start)) 129 } 130 131 //go:embed data.txt 132 var b []byte 133 134 func TestBloomFilterVsHashMap(t *testing.T) { 135 words := bytes.Split(b, []byte{'\r', '\n'}) 136 bloomFilterTest(words...) 137 hashMaptest(words...) 138 } 139 140 func bloomFilterTest(data ...[]byte) { 141 bf := NewBloomFilter(16384) 142 ts1 := time.Now() 143 for i := 0; i < 10; i++ { 144 for _, word := range data { 145 key := fmt.Sprintf("%s-%d", word, i) 146 bf.Set([]byte(key)) 147 } 148 } 149 ts2 := time.Since(ts1) 150 size := util.Sizeof(bf) 151 fmt.Printf(">> bloom filter size estimate %dB -> %dKB -> %dMB\n", size, size/1024, size/1024/1024) 152 fmt.Println(ts2) 153 bf = nil 154 runtime.GC() 155 } 156 157 func hashMaptest(data ...[]byte) { 158 hm := openaddr.NewHashMap(16384) 159 ts1 := time.Now() 160 for i := 0; i < 10; i++ { 161 for _, word := range data { 162 key := fmt.Sprintf("%s-%d", word, i) 163 hm.Set(key, []byte(key)) 164 } 165 } 166 ts2 := time.Since(ts1) 167 size := util.Sizeof(hm) 168 fmt.Printf(">> hashmap size estimate %dB -> %dKB -> %dMB\n", size, size/1024, size/1024/1024) 169 fmt.Println(ts2) 170 hm = nil 171 runtime.GC() 172 }