github.com/scottcagno/storage@v1.8.0/pkg/bloom/bloom_test.go (about)

     1  /*
     2   *
     3   *  * // Copyright (c) 2021 Scott Cagno. All rights reserved.
     4   *  * // The license can be found in the root of this project; see LICENSE.
     5   *
     6   */
     7  
     8  package bloom
     9  
    10  import (
    11  	"bytes"
    12  	_ "embed"
    13  	"fmt"
    14  	"github.com/scottcagno/storage/pkg/hashmap/openaddr"
    15  	"github.com/scottcagno/storage/pkg/util"
    16  	"log"
    17  	"runtime"
    18  	"strconv"
    19  	"testing"
    20  	"time"
    21  )
    22  
    23  var data = [11][]byte{
    24  	[]byte("key-000000"),
    25  	[]byte("Hendrix Avalos"),
    26  	[]byte("Yasmin Mellor"),
    27  	[]byte("Coco Mueller"),
    28  	[]byte("Bodhi Jimenez"),
    29  	[]byte("Seth Kinney"),
    30  	[]byte("Carla Le"),
    31  	[]byte("Kajus Spooner"),
    32  	[]byte("Javier Barrera"),
    33  	[]byte("Junaid O'Brien"),
    34  	[]byte("Emma Guest"),
    35  }
    36  
    37  var sizeMB = 1<<20 - 1
    38  
    39  func TestBoomFilterSize(t *testing.T) {
    40  	n := uint(1 * sizeMB)
    41  	bf := NewBloomFilter(n)
    42  
    43  	fmt.Printf("opening a bloom filter of size: %d\n", n)
    44  	fmt.Println("bloom filter:", util.Sizeof(bf))
    45  	fmt.Println("bloom count:", bf.Count())
    46  	fmt.Println("bloom size:", bf.Size())
    47  	fmt.Println("put foo")
    48  	bf.Set([]byte("foo"))
    49  	fmt.Println("has foo", bf.Has([]byte("foo")))
    50  	fmt.Println("has foo1", bf.Has([]byte("foo1")))
    51  	fmt.Println("has bar", bf.Has([]byte("bar")))
    52  	fmt.Println("del foo")
    53  	bf.Unset([]byte("foo"))
    54  	fmt.Println("has foo", bf.Has([]byte("foo")))
    55  }
    56  
    57  func TestBloomFilter(t *testing.T) {
    58  	// test new filter
    59  	bf := NewBloomFilter(1 << 12)
    60  	fmt.Println(util.Sizeof(bf))
    61  
    62  	// test adding data
    63  	for i := 0; i < len(data); i++ {
    64  		key := data[i]
    65  		bf.Set(key)
    66  		fmt.Printf("bf.Set(%q)\n", key)
    67  	}
    68  
    69  	// test checking data
    70  	for i := 0; i < len(data); i++ {
    71  		key := data[i]
    72  		ok := bf.Has(key)
    73  		fmt.Printf("bf.Has(%q): %v\n", key, ok)
    74  		if !ok {
    75  			t.Errorf("error: expected=%v, got=%v\n", true, ok)
    76  		}
    77  		key = []byte("key-000000_key_does_not_exist")
    78  		ok = bf.Has(key)
    79  		if ok {
    80  			t.Errorf("error: expected=%v, got=%v\n", false, ok)
    81  		}
    82  	}
    83  
    84  	for i := 0; i < len(data); i++ {
    85  		key := []byte("key-" + strconv.Itoa(i))
    86  		ok := bf.Has(key)
    87  		if ok {
    88  			t.Errorf("error: expected=%v, got=%v\n", false, ok)
    89  		}
    90  	}
    91  
    92  	// test unseting data
    93  	for i := 0; i < len(data); i++ {
    94  		key := data[i]
    95  		bf.Unset(key)
    96  		fmt.Printf("bf.Unset(%q)\n", key)
    97  	}
    98  
    99  	// test checking data
   100  	for i := 0; i < len(data); i++ {
   101  		key := data[i]
   102  		ok := bf.Has(key)
   103  		fmt.Printf("bf.Has(%q): %v\n", key, ok)
   104  		if ok {
   105  			t.Errorf("error: expected=%v, got=%v\n", false, ok)
   106  		}
   107  		key = []byte("key-000000_key_does_not_exist")
   108  		ok = bf.Has(key)
   109  		if ok {
   110  			t.Errorf("error: expected=%v, got=%v\n", false, ok)
   111  		}
   112  	}
   113  
   114  	for i := 0; i < len(data); i++ {
   115  		key := []byte("key-" + strconv.Itoa(i))
   116  		ok := bf.Has(key)
   117  		if ok {
   118  			t.Errorf("error: expected=%v, got=%v\n", false, ok)
   119  		}
   120  	}
   121  }
   122  
   123  func track(msg string) (string, time.Time) {
   124  	return msg, time.Now()
   125  }
   126  
   127  func duration(msg string, start time.Time) {
   128  	log.Printf("%v: %v\n", msg, time.Since(start))
   129  }
   130  
   131  //go:embed data.txt
   132  var b []byte
   133  
   134  func TestBloomFilterVsHashMap(t *testing.T) {
   135  	words := bytes.Split(b, []byte{'\r', '\n'})
   136  	bloomFilterTest(words...)
   137  	hashMaptest(words...)
   138  }
   139  
   140  func bloomFilterTest(data ...[]byte) {
   141  	bf := NewBloomFilter(16384)
   142  	ts1 := time.Now()
   143  	for i := 0; i < 10; i++ {
   144  		for _, word := range data {
   145  			key := fmt.Sprintf("%s-%d", word, i)
   146  			bf.Set([]byte(key))
   147  		}
   148  	}
   149  	ts2 := time.Since(ts1)
   150  	size := util.Sizeof(bf)
   151  	fmt.Printf(">> bloom filter size estimate %dB -> %dKB -> %dMB\n", size, size/1024, size/1024/1024)
   152  	fmt.Println(ts2)
   153  	bf = nil
   154  	runtime.GC()
   155  }
   156  
   157  func hashMaptest(data ...[]byte) {
   158  	hm := openaddr.NewHashMap(16384)
   159  	ts1 := time.Now()
   160  	for i := 0; i < 10; i++ {
   161  		for _, word := range data {
   162  			key := fmt.Sprintf("%s-%d", word, i)
   163  			hm.Set(key, []byte(key))
   164  		}
   165  	}
   166  	ts2 := time.Since(ts1)
   167  	size := util.Sizeof(hm)
   168  	fmt.Printf(">> hashmap size estimate %dB -> %dKB -> %dMB\n", size, size/1024, size/1024/1024)
   169  	fmt.Println(ts2)
   170  	hm = nil
   171  	runtime.GC()
   172  }