github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/wordsearch/main.go (about)

     1  package main
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"math"
     7  	"os"
     8  	"strings"
     9  	"time"
    10  	"unicode/utf8"
    11  
    12  	"github.com/dgryski/go-cuckoof"
    13  	"github.com/egonelbre/exp/wordsearch/trie-compact"
    14  	"github.com/loov/hrtime"
    15  )
    16  
    17  func main() {
    18  	root := trie.Uncompact{}
    19  
    20  	r, err := os.Open("enable1.txt")
    21  	if err != nil {
    22  		panic(err)
    23  	}
    24  
    25  	var words []string
    26  	wordssize := 0
    27  
    28  	sc := bufio.NewScanner(r)
    29  	for sc.Scan() {
    30  		line := strings.TrimSpace(sc.Text())
    31  		if line == "" || utf8.RuneCountInString(line) == 1 {
    32  			continue
    33  		}
    34  		words = append(words, line)
    35  		wordssize += len(line)
    36  		root.Insert(line)
    37  	}
    38  
    39  	compact := root.Compress()
    40  	fmt.Printf("serialized %d bytes\n", compact.Size())
    41  	fmt.Printf("%.1f bytes average word\n", float64(wordssize)/float64(len(words)))
    42  	fmt.Printf("%.1f bytes per word\n", float64(compact.Size())/float64(len(words)))
    43  	fmt.Printf("%.1f bytes per key\n", math.Log2(float64(compact.NodeCount()))/8)
    44  
    45  	start := hrtime.Now()
    46  	for _, word := range words {
    47  		if !compact.Contains(word) {
    48  			fmt.Println("did not find", word)
    49  			break
    50  		}
    51  	}
    52  	stop := hrtime.Now()
    53  	fmt.Printf("average lookup: %v\n", (stop-start)/time.Duration(len(words)))
    54  
    55  	fmt.Println(compact.Contains("something"))
    56  	fmt.Println(compact.Contains("NOTHING"))
    57  
    58  	BenchmarkBinarySearch(words)
    59  	BenchmarkCuckooFilter(words)
    60  }
    61  
    62  func BenchmarkBinarySearch(words []string) {
    63  	start := hrtime.Now()
    64  	for _, word := range words {
    65  		_ = Search(words, word)
    66  	}
    67  	stop := hrtime.Now()
    68  	fmt.Printf("average binary search lookup: %v\n", (stop-start)/time.Duration(len(words)))
    69  }
    70  
    71  func Search(words []string, word string) int {
    72  	i, k := 0, len(words)
    73  	for i < k {
    74  		h := int(uint(i+k) >> 1)
    75  		if !(words[h] >= word) {
    76  			i = h + 1
    77  		} else {
    78  			k = h
    79  		}
    80  	}
    81  	return i
    82  }
    83  
    84  func BenchmarkCuckooFilter(words []string) {
    85  	filter := cuckoof.New(1 << 19)
    86  	for _, word := range words {
    87  		filter.Insert([]byte(word))
    88  	}
    89  
    90  	start := hrtime.Now()
    91  	for _, word := range words {
    92  		if !filter.Lookup([]byte(word)) {
    93  			fmt.Println("did not find", word)
    94  			break
    95  		}
    96  	}
    97  	stop := hrtime.Now()
    98  	fmt.Printf("cuckoo search lookup: %v\n", (stop-start)/time.Duration(len(words)))
    99  }