github.com/tcotav/boggle@v0.0.0-20231023231124-a86497006536/wordlookup/dictionary.go (about)

     1  package wordlookup
     2  
     3  import (
     4  	"bufio"
     5  	"os"
     6  	"strings"
     7  	"unicode"
     8  
     9  	"github.com/dghubble/trie"
    10  	"github.com/rs/zerolog/log"
    11  )
    12  
    13  // Dictionary is a trie of words
    14  // chose trie because prefix matching is a common use case
    15  // searches are O(k) where k is the length of the word
    16  type Dictionary struct {
    17  	trie *trie.RuneTrie
    18  }
    19  
    20  func NewDictionary() *Dictionary {
    21  	return &Dictionary{trie.NewRuneTrie()}
    22  }
    23  
    24  func NewDictionaryFromFile(path string) (*Dictionary, error) {
    25  	d := Dictionary{trie.NewRuneTrie()}
    26  	err := d.LoadFromFile(path)
    27  	return &d, err
    28  }
    29  
    30  // Add adds a word to the dictionary
    31  func (d *Dictionary) Add(word string) {
    32  	d.trie.Put(word, true)
    33  }
    34  
    35  func (d Dictionary) Contains(word string) bool {
    36  	val := d.trie.Get(word)
    37  	return val != nil
    38  }
    39  
    40  // isAlphaWord returns true if the word is all letters
    41  // utility function
    42  func isAlphaWord(word string) bool {
    43  	for _, r := range word {
    44  		if !unicode.IsLetter(r) {
    45  			return false
    46  		}
    47  	}
    48  	return true
    49  }
    50  
    51  // LoadFromFile loads a dictionary from a file
    52  // also does some data cleaning that arguably should be done elsewhere
    53  // as a preprocessing step prior to using it in the service as this delays startup
    54  func (d *Dictionary) LoadFromFile(path string) error {
    55  	// open file for reading
    56  	readfile, err := os.Open(path)
    57  	if err != nil {
    58  		return err
    59  	}
    60  	defer readfile.Close()
    61  
    62  	// read line by line
    63  	fileScanner := bufio.NewScanner(readfile)
    64  
    65  	// set the split function for fileScanner, specifically here to split on new lines
    66  	fileScanner.Split(bufio.ScanLines)
    67  	count := 0
    68  	for fileScanner.Scan() {
    69  		// add each line to the dictionary
    70  		word := fileScanner.Text()
    71  		// apply the boggle rules
    72  		// all words must be at least 3 characters long
    73  		if len(word) < 3 {
    74  			continue
    75  		}
    76  		// word list we used has some words with apostrophes, dashes, and numbers
    77  		// we only want to process words that are all letters
    78  		if isAlphaWord(word) {
    79  			// then lowercase the word as our word list has a mix
    80  			word = strings.ToLower(word)
    81  			count += 1
    82  			// and add it to the dictionary
    83  			d.Add(word)
    84  		}
    85  	}
    86  	log.Info().Int("count", count).Msg("Loaded dictionary")
    87  	return nil
    88  }