github.com/egonelbre/exp@v0.0.0-20240430123955-ed1d3aa93911/wordsearch/main.go (about) 1 package main 2 3 import ( 4 "bufio" 5 "fmt" 6 "math" 7 "os" 8 "strings" 9 "time" 10 "unicode/utf8" 11 12 "github.com/dgryski/go-cuckoof" 13 "github.com/egonelbre/exp/wordsearch/trie-compact" 14 "github.com/loov/hrtime" 15 ) 16 17 func main() { 18 root := trie.Uncompact{} 19 20 r, err := os.Open("enable1.txt") 21 if err != nil { 22 panic(err) 23 } 24 25 var words []string 26 wordssize := 0 27 28 sc := bufio.NewScanner(r) 29 for sc.Scan() { 30 line := strings.TrimSpace(sc.Text()) 31 if line == "" || utf8.RuneCountInString(line) == 1 { 32 continue 33 } 34 words = append(words, line) 35 wordssize += len(line) 36 root.Insert(line) 37 } 38 39 compact := root.Compress() 40 fmt.Printf("serialized %d bytes\n", compact.Size()) 41 fmt.Printf("%.1f bytes average word\n", float64(wordssize)/float64(len(words))) 42 fmt.Printf("%.1f bytes per word\n", float64(compact.Size())/float64(len(words))) 43 fmt.Printf("%.1f bytes per key\n", math.Log2(float64(compact.NodeCount()))/8) 44 45 start := hrtime.Now() 46 for _, word := range words { 47 if !compact.Contains(word) { 48 fmt.Println("did not find", word) 49 break 50 } 51 } 52 stop := hrtime.Now() 53 fmt.Printf("average lookup: %v\n", (stop-start)/time.Duration(len(words))) 54 55 fmt.Println(compact.Contains("something")) 56 fmt.Println(compact.Contains("NOTHING")) 57 58 BenchmarkBinarySearch(words) 59 BenchmarkCuckooFilter(words) 60 } 61 62 func BenchmarkBinarySearch(words []string) { 63 start := hrtime.Now() 64 for _, word := range words { 65 _ = Search(words, word) 66 } 67 stop := hrtime.Now() 68 fmt.Printf("average binary search lookup: %v\n", (stop-start)/time.Duration(len(words))) 69 } 70 71 func Search(words []string, word string) int { 72 i, k := 0, len(words) 73 for i < k { 74 h := int(uint(i+k) >> 1) 75 if !(words[h] >= word) { 76 i = h + 1 77 } else { 78 k = h 79 } 80 } 81 return i 82 } 83 84 func BenchmarkCuckooFilter(words []string) { 85 filter := cuckoof.New(1 << 19) 86 for _, word := range words { 87 filter.Insert([]byte(word)) 88 } 89 90 start := hrtime.Now() 91 for _, word := range words { 92 if !filter.Lookup([]byte(word)) { 93 fmt.Println("did not find", word) 94 break 95 } 96 } 97 stop := hrtime.Now() 98 fmt.Printf("cuckoo search lookup: %v\n", (stop-start)/time.Duration(len(words))) 99 }