github.com/tursom/GoCollections@v0.3.10/util/bloom/Bloom_test.go (about)

     1  /*
     2   * Copyright (c) 2023 tursom. All rights reserved.
     3   * Use of this source code is governed by a GPL-3
     4   * license that can be found in the LICENSE file.
     5   */
     6  
     7  package bloom
     8  
     9  import (
    10  	"bytes"
    11  	"compress/gzip"
    12  	"fmt"
    13  	"math"
    14  	"testing"
    15  
    16  	"github.com/tursom/GoCollections/util/time"
    17  )
    18  
    19  func TestBloom_Contains(t *testing.T) {
    20  	bloom := NewBloom(100_0000, 0.1)
    21  	bloom.Add([]byte("hello, world!"))
    22  	for i := 0; i < 10000; i++ {
    23  		bloom.Add([]byte(fmt.Sprintf("%d", i)))
    24  	}
    25  
    26  	fmt.Println(len(bloom.m))
    27  	fmt.Println(len(gz(bloom.m.Bytes())))
    28  
    29  	if !bloom.Contains([]byte("hello, world!")) {
    30  		t.Failed()
    31  	}
    32  
    33  	if bloom.Contains([]byte("hello, not world!")) {
    34  		t.Failed()
    35  	}
    36  }
    37  
    38  func TestBloom_miss(t *testing.T) {
    39  	//HashFunc = func(data []byte, seed uint32) uint32 {
    40  	//	return murmur3.Sum32WithSeed(data, seed)
    41  	//	//	h1, _ := murmur3.Sum128WithSeed(data, seed)
    42  	//	//	return uint32(h1)
    43  	//}
    44  
    45  	var base uint = 1000_0000
    46  	bloom := NewBloom(base, 0.03)
    47  
    48  	t1 := time.Now()
    49  
    50  	for i := 0; i < int(base); i++ {
    51  		bloom.Add([]byte(fmt.Sprintf("%d", i)))
    52  	}
    53  
    54  	counter := make([]uint, 256)
    55  	for _, value := range bloom.m {
    56  		counter[value]++
    57  	}
    58  
    59  	miss := 0
    60  	for i := base; i < base*2; i++ {
    61  		if bloom.Contains([]byte(fmt.Sprintf("%d", i))) {
    62  			miss += 1
    63  		}
    64  	}
    65  
    66  	t2 := time.Now()
    67  
    68  	fmt.Println(miss, float64(miss)/float64(base))
    69  	fmt.Println(counter)
    70  
    71  	var H float64
    72  	for _, c := range counter {
    73  		if c == 0 {
    74  			continue
    75  		}
    76  		p := float64(c) / float64(len(bloom.m))
    77  		H += -p * math.Log2(p)
    78  	}
    79  	fmt.Println(H / 8)
    80  	fmt.Println(t2.Sub(t1))
    81  }
    82  
    83  func gz(b []byte) []byte {
    84  	buffer := bytes.NewBuffer(nil)
    85  
    86  	writer := gzip.NewWriter(buffer)
    87  	writer.Write(b)
    88  	writer.Flush()
    89  
    90  	return buffer.Bytes()
    91  }
    92  
    93  func TestCalcBitLength(t *testing.T) {
    94  	//fmt.Printf("%d\n", CalcBitLength(1024*1024*1024, 0.03)/8)
    95  	for i := 0; i < 63; i++ {
    96  		var n uint = 1 << i
    97  		numBytes := CalcBitLength(n, 0.1) / 8
    98  		fmt.Printf("%d: %d, %s / %s = %f\n",
    99  			i,
   100  			NumHashFunctions(n, numBytes*8),
   101  			storageFormat(numBytes),
   102  			storageFormat(n),
   103  			float64(numBytes)/float64(n))
   104  	}
   105  }
   106  
   107  func storageFormat(size uint) string {
   108  	var base uint = 1
   109  	if size < 1024*base {
   110  		return fmt.Sprintf("%db", size/base)
   111  	}
   112  
   113  	base *= 1024
   114  	if size < 1024*base {
   115  		return fmt.Sprintf("%fkb", float64(size)/float64(base))
   116  	}
   117  
   118  	base *= 1024
   119  	if size < 1024*base {
   120  		return fmt.Sprintf("%fmb", float64(size)/float64(base))
   121  	}
   122  
   123  	base *= 1024
   124  	if size < 1024*base {
   125  		return fmt.Sprintf("%fgb", float64(size)/float64(base))
   126  	}
   127  
   128  	base *= 1024
   129  	return fmt.Sprintf("%ftb", float64(size)/float64(base))
   130  }