github.com/xyproto/orbiton/v2@v2.65.12-0.20240516144430-e10a419274ec/spellcheck.go (about)

     1  package main
     2  
     3  import (
     4  	_ "embed"
     5  	"errors"
     6  	"regexp"
     7  	"strings"
     8  
     9  	"github.com/sajari/fuzzy"
    10  	"github.com/xyproto/vt100"
    11  )
    12  
    13  // how far away a word can be from the corrected word that the spellchecker suggests
    14  const fuzzyDepth = 2
    15  
    16  var (
    17  	//go:embed words_en.txt.gz
    18  	gzwords []byte
    19  
    20  	spellChecker *SpellChecker
    21  
    22  	errFoundNoTypos = errors.New("found no typos")
    23  	wordRegexp      = regexp.MustCompile(`(?:%2[A-Z])?([a-zA-Z0-9]+)`) // avoid capturing "%2F" and "%2B", other than that, capture English words
    24  
    25  	dontSuggest = []string{"urine"}
    26  )
    27  
    28  // SpellChecker is a slice of correct, custom and ignored words together with a *fuzzy.Model
    29  type SpellChecker struct {
    30  	fuzzyModel   *fuzzy.Model
    31  	markedWord   string
    32  	correctWords []string
    33  	customWords  []string
    34  	ignoredWords []string
    35  }
    36  
    37  // NewSpellChecker creates and initializes a new *SpellChecker.
    38  // The embedded English word list is used to train the *fuzzy.Model.
    39  func NewSpellChecker() (*SpellChecker, error) {
    40  	var sc SpellChecker
    41  
    42  	sc.customWords = make([]string, 0)
    43  	sc.ignoredWords = make([]string, 0)
    44  
    45  	wordData, err := gUnzipData(gzwords)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  	sc.correctWords = strings.Fields(string(wordData))
    50  
    51  	sc.Train(false) // training for the first time, not re-training
    52  
    53  	return &sc, nil
    54  }
    55  
    56  // Train will train or re-train the current spellChecker.fuzzyModel, by using the current SpellChecker word slices
    57  func (sc *SpellChecker) Train(reTrain bool) {
    58  	if reTrain || sc.fuzzyModel == nil {
    59  
    60  		// Initialize the spellchecker
    61  		sc.fuzzyModel = fuzzy.NewModel()
    62  
    63  		// This expands the distance searched, but costs more resources (memory and time).
    64  		// For spell checking, "2" is typically enough, for query suggestions this can be higher
    65  		sc.fuzzyModel.SetDepth(fuzzyDepth)
    66  
    67  		lenCorrect := len(sc.correctWords)
    68  		lenCustom := len(sc.customWords)
    69  
    70  		trainWords := make([]string, lenCorrect+lenCustom) // initialize with enough capacity
    71  
    72  		var word string
    73  
    74  		for i := 0; i < lenCorrect; i++ {
    75  			word := sc.correctWords[i]
    76  			if !hasS(sc.ignoredWords, word) {
    77  				trainWords = append(trainWords, word)
    78  			}
    79  		}
    80  
    81  		for i := 0; i < lenCustom; i++ {
    82  			word = sc.customWords[i]
    83  			if !hasS(sc.ignoredWords, word) {
    84  				trainWords = append(trainWords, word)
    85  			}
    86  		}
    87  
    88  		// Train multiple words simultaneously by passing an array of strings to the "Train" function
    89  		sc.fuzzyModel.Train(trainWords)
    90  	}
    91  }
    92  
    93  // CurrentSpellCheckWord returns the currently marked spell check word
    94  func (e *Editor) CurrentSpellCheckWord() string {
    95  	if spellChecker == nil {
    96  		return ""
    97  	}
    98  	return spellChecker.markedWord
    99  }
   100  
   101  // AddCurrentWordToWordList will attempt to add the word at the cursor to the spellcheck word list
   102  func (e *Editor) AddCurrentWordToWordList() string {
   103  	if spellChecker == nil {
   104  		newSpellChecker, err := NewSpellChecker()
   105  		if err != nil {
   106  			return ""
   107  		}
   108  		spellChecker = newSpellChecker
   109  	}
   110  
   111  	var word string
   112  	matches := wordRegexp.FindStringSubmatch(e.CurrentSpellCheckWord())
   113  	if len(matches) > 1 { // Ensure that there's a captured group
   114  		word = matches[1] // The captured word is in the second item of the slice
   115  	}
   116  
   117  	if hasS(spellChecker.customWords, word) || hasS(spellChecker.correctWords, word) { // already has this word
   118  		return word
   119  	}
   120  
   121  	spellChecker.customWords = append(spellChecker.customWords, word)
   122  
   123  	// Add the word
   124  	spellChecker.fuzzyModel.TrainWord(word)
   125  
   126  	return word
   127  }
   128  
   129  // RemoveCurrentWordFromWordList will attempt to add the word at the cursor to the spellcheck word list
   130  func (e *Editor) RemoveCurrentWordFromWordList() string {
   131  	if spellChecker == nil {
   132  		newSpellChecker, err := NewSpellChecker()
   133  		if err != nil {
   134  			return ""
   135  		}
   136  		spellChecker = newSpellChecker
   137  	}
   138  
   139  	var word string
   140  	matches := wordRegexp.FindStringSubmatch(e.CurrentSpellCheckWord())
   141  	if len(matches) > 1 { // Ensure that there's a captured group
   142  		word = matches[1] // The captured word is in the second item of the slice
   143  	}
   144  
   145  	if hasS(spellChecker.ignoredWords, word) { // already has this word
   146  		return word
   147  	}
   148  	spellChecker.ignoredWords = append(spellChecker.ignoredWords, word)
   149  
   150  	spellChecker.Train(true) // re-train
   151  
   152  	return word
   153  }
   154  
   155  // SearchForTypo returns the first misspelled word in the document (as defined by the dictionary),
   156  // or an empty string. The second returned string is what the word could be if it was corrected.
   157  func (e *Editor) SearchForTypo() (string, string, error) {
   158  	if spellChecker == nil {
   159  		newSpellChecker, err := NewSpellChecker()
   160  		if err != nil {
   161  			return "", "", err
   162  		}
   163  		spellChecker = newSpellChecker
   164  	}
   165  	e.spellCheckMode = true
   166  	spellChecker.markedWord = ""
   167  
   168  	// Use the regular expression to find all the words
   169  	words := wordRegexp.FindAllString(e.String(), -1)
   170  
   171  	// Now spellcheck all the words
   172  	for _, word := range words {
   173  		justTheWord := strings.TrimSpace(word)
   174  		if justTheWord == "" {
   175  			continue
   176  		}
   177  		if hasS(spellChecker.ignoredWords, justTheWord) || hasS(spellChecker.customWords, justTheWord) { // || hasS(spellChecker.correctWords, justTheWord) {
   178  			continue
   179  		}
   180  
   181  		lower := strings.ToLower(justTheWord)
   182  
   183  		if hasS(spellChecker.ignoredWords, lower) || hasS(spellChecker.customWords, lower) { // || hasS(spellChecker.correctWords, lower) {
   184  			continue
   185  		}
   186  
   187  		corrected := spellChecker.fuzzyModel.SpellCheck(justTheWord)
   188  		if !strings.EqualFold(justTheWord, corrected) && corrected != "" && !hasS(dontSuggest, corrected) { // case insensitive comparison of the original and spell-check-suggested word
   189  			spellChecker.markedWord = justTheWord
   190  			return justTheWord, corrected, nil
   191  		}
   192  	}
   193  	return "", "", errFoundNoTypos
   194  }
   195  
   196  // NanoNextTypo tries to jump to the next typo
   197  func (e *Editor) NanoNextTypo(c *vt100.Canvas, status *StatusBar) (string, string) {
   198  	if typo, corrected, err := e.SearchForTypo(); err == nil || err == errFoundNoTypos {
   199  		e.redraw = true
   200  		e.redrawCursor = true
   201  		if err == errFoundNoTypos || typo == "" {
   202  			status.ClearAll(c)
   203  			status.SetMessage("No typos found")
   204  			status.Show(c, e)
   205  			e.spellCheckMode = false
   206  			e.ClearSearch()
   207  			return "", ""
   208  		}
   209  		e.SetSearchTerm(c, status, typo, true) // true for spellCheckMode
   210  		if err := e.GoToNextMatch(c, status, true, true); err == errNoSearchMatch {
   211  			status.ClearAll(c)
   212  			status.SetMessage("No typos found")
   213  			status.Show(c, e)
   214  			e.spellCheckMode = false
   215  			e.ClearSearch()
   216  			return "", ""
   217  		}
   218  		if typo != "" && corrected != "" {
   219  			status.ClearAll(c)
   220  			status.SetMessage(typo + " could be " + corrected)
   221  			status.Show(c, e)
   222  		}
   223  		return typo, corrected
   224  	}
   225  	return "", ""
   226  }