github.com/xyproto/orbiton/v2@v2.65.12-0.20240516144430-e10a419274ec/spellcheck.go (about) 1 package main 2 3 import ( 4 _ "embed" 5 "errors" 6 "regexp" 7 "strings" 8 9 "github.com/sajari/fuzzy" 10 "github.com/xyproto/vt100" 11 ) 12 13 // how far away a word can be from the corrected word that the spellchecker suggests 14 const fuzzyDepth = 2 15 16 var ( 17 //go:embed words_en.txt.gz 18 gzwords []byte 19 20 spellChecker *SpellChecker 21 22 errFoundNoTypos = errors.New("found no typos") 23 wordRegexp = regexp.MustCompile(`(?:%2[A-Z])?([a-zA-Z0-9]+)`) // avoid capturing "%2F" and "%2B", other than that, capture English words 24 25 dontSuggest = []string{"urine"} 26 ) 27 28 // SpellChecker is a slice of correct, custom and ignored words together with a *fuzzy.Model 29 type SpellChecker struct { 30 fuzzyModel *fuzzy.Model 31 markedWord string 32 correctWords []string 33 customWords []string 34 ignoredWords []string 35 } 36 37 // NewSpellChecker creates and initializes a new *SpellChecker. 38 // The embedded English word list is used to train the *fuzzy.Model. 39 func NewSpellChecker() (*SpellChecker, error) { 40 var sc SpellChecker 41 42 sc.customWords = make([]string, 0) 43 sc.ignoredWords = make([]string, 0) 44 45 wordData, err := gUnzipData(gzwords) 46 if err != nil { 47 return nil, err 48 } 49 sc.correctWords = strings.Fields(string(wordData)) 50 51 sc.Train(false) // training for the first time, not re-training 52 53 return &sc, nil 54 } 55 56 // Train will train or re-train the current spellChecker.fuzzyModel, by using the current SpellChecker word slices 57 func (sc *SpellChecker) Train(reTrain bool) { 58 if reTrain || sc.fuzzyModel == nil { 59 60 // Initialize the spellchecker 61 sc.fuzzyModel = fuzzy.NewModel() 62 63 // This expands the distance searched, but costs more resources (memory and time). 64 // For spell checking, "2" is typically enough, for query suggestions this can be higher 65 sc.fuzzyModel.SetDepth(fuzzyDepth) 66 67 lenCorrect := len(sc.correctWords) 68 lenCustom := len(sc.customWords) 69 70 trainWords := make([]string, lenCorrect+lenCustom) // initialize with enough capacity 71 72 var word string 73 74 for i := 0; i < lenCorrect; i++ { 75 word := sc.correctWords[i] 76 if !hasS(sc.ignoredWords, word) { 77 trainWords = append(trainWords, word) 78 } 79 } 80 81 for i := 0; i < lenCustom; i++ { 82 word = sc.customWords[i] 83 if !hasS(sc.ignoredWords, word) { 84 trainWords = append(trainWords, word) 85 } 86 } 87 88 // Train multiple words simultaneously by passing an array of strings to the "Train" function 89 sc.fuzzyModel.Train(trainWords) 90 } 91 } 92 93 // CurrentSpellCheckWord returns the currently marked spell check word 94 func (e *Editor) CurrentSpellCheckWord() string { 95 if spellChecker == nil { 96 return "" 97 } 98 return spellChecker.markedWord 99 } 100 101 // AddCurrentWordToWordList will attempt to add the word at the cursor to the spellcheck word list 102 func (e *Editor) AddCurrentWordToWordList() string { 103 if spellChecker == nil { 104 newSpellChecker, err := NewSpellChecker() 105 if err != nil { 106 return "" 107 } 108 spellChecker = newSpellChecker 109 } 110 111 var word string 112 matches := wordRegexp.FindStringSubmatch(e.CurrentSpellCheckWord()) 113 if len(matches) > 1 { // Ensure that there's a captured group 114 word = matches[1] // The captured word is in the second item of the slice 115 } 116 117 if hasS(spellChecker.customWords, word) || hasS(spellChecker.correctWords, word) { // already has this word 118 return word 119 } 120 121 spellChecker.customWords = append(spellChecker.customWords, word) 122 123 // Add the word 124 spellChecker.fuzzyModel.TrainWord(word) 125 126 return word 127 } 128 129 // RemoveCurrentWordFromWordList will attempt to add the word at the cursor to the spellcheck word list 130 func (e *Editor) RemoveCurrentWordFromWordList() string { 131 if spellChecker == nil { 132 newSpellChecker, err := NewSpellChecker() 133 if err != nil { 134 return "" 135 } 136 spellChecker = newSpellChecker 137 } 138 139 var word string 140 matches := wordRegexp.FindStringSubmatch(e.CurrentSpellCheckWord()) 141 if len(matches) > 1 { // Ensure that there's a captured group 142 word = matches[1] // The captured word is in the second item of the slice 143 } 144 145 if hasS(spellChecker.ignoredWords, word) { // already has this word 146 return word 147 } 148 spellChecker.ignoredWords = append(spellChecker.ignoredWords, word) 149 150 spellChecker.Train(true) // re-train 151 152 return word 153 } 154 155 // SearchForTypo returns the first misspelled word in the document (as defined by the dictionary), 156 // or an empty string. The second returned string is what the word could be if it was corrected. 157 func (e *Editor) SearchForTypo() (string, string, error) { 158 if spellChecker == nil { 159 newSpellChecker, err := NewSpellChecker() 160 if err != nil { 161 return "", "", err 162 } 163 spellChecker = newSpellChecker 164 } 165 e.spellCheckMode = true 166 spellChecker.markedWord = "" 167 168 // Use the regular expression to find all the words 169 words := wordRegexp.FindAllString(e.String(), -1) 170 171 // Now spellcheck all the words 172 for _, word := range words { 173 justTheWord := strings.TrimSpace(word) 174 if justTheWord == "" { 175 continue 176 } 177 if hasS(spellChecker.ignoredWords, justTheWord) || hasS(spellChecker.customWords, justTheWord) { // || hasS(spellChecker.correctWords, justTheWord) { 178 continue 179 } 180 181 lower := strings.ToLower(justTheWord) 182 183 if hasS(spellChecker.ignoredWords, lower) || hasS(spellChecker.customWords, lower) { // || hasS(spellChecker.correctWords, lower) { 184 continue 185 } 186 187 corrected := spellChecker.fuzzyModel.SpellCheck(justTheWord) 188 if !strings.EqualFold(justTheWord, corrected) && corrected != "" && !hasS(dontSuggest, corrected) { // case insensitive comparison of the original and spell-check-suggested word 189 spellChecker.markedWord = justTheWord 190 return justTheWord, corrected, nil 191 } 192 } 193 return "", "", errFoundNoTypos 194 } 195 196 // NanoNextTypo tries to jump to the next typo 197 func (e *Editor) NanoNextTypo(c *vt100.Canvas, status *StatusBar) (string, string) { 198 if typo, corrected, err := e.SearchForTypo(); err == nil || err == errFoundNoTypos { 199 e.redraw = true 200 e.redrawCursor = true 201 if err == errFoundNoTypos || typo == "" { 202 status.ClearAll(c) 203 status.SetMessage("No typos found") 204 status.Show(c, e) 205 e.spellCheckMode = false 206 e.ClearSearch() 207 return "", "" 208 } 209 e.SetSearchTerm(c, status, typo, true) // true for spellCheckMode 210 if err := e.GoToNextMatch(c, status, true, true); err == errNoSearchMatch { 211 status.ClearAll(c) 212 status.SetMessage("No typos found") 213 status.Show(c, e) 214 e.spellCheckMode = false 215 e.ClearSearch() 216 return "", "" 217 } 218 if typo != "" && corrected != "" { 219 status.ClearAll(c) 220 status.SetMessage(typo + " could be " + corrected) 221 status.Show(c, e) 222 } 223 return typo, corrected 224 } 225 return "", "" 226 }