pkg.re/essentialkaos/ek.10@v12.41.0+incompatible/spellcheck/spellcheck.go (about)

     1  // Package spellcheck provides spellcheck based on Damerau–Levenshtein distance algorithm
     2  package spellcheck
     3  
     4  // ////////////////////////////////////////////////////////////////////////////////// //
     5  //                                                                                    //
     6  //                         Copyright (c) 2022 ESSENTIAL KAOS                          //
     7  //      Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0>     //
     8  //                                                                                    //
     9  // ////////////////////////////////////////////////////////////////////////////////// //
    10  
    11  import (
    12  	"sort"
    13  	"strings"
    14  
    15  	"pkg.re/essentialkaos/ek.v12/mathutil"
    16  )
    17  
    18  // ////////////////////////////////////////////////////////////////////////////////// //
    19  
    20  // Model is spellcheck model struct
    21  type Model struct {
    22  	terms []string
    23  }
    24  
    25  // ////////////////////////////////////////////////////////////////////////////////// //
    26  
    27  type suggestItem struct {
    28  	term  string
    29  	score int
    30  }
    31  
    32  type suggestItems []*suggestItem
    33  
    34  func (s suggestItems) Len() int {
    35  	return len(s)
    36  }
    37  
    38  func (s suggestItems) Less(i, j int) bool {
    39  	return s[i].score < s[j].score
    40  }
    41  
    42  func (s suggestItems) Swap(i, j int) {
    43  	s[i], s[j] = s[j], s[i]
    44  }
    45  
    46  var threshold = 2
    47  
    48  // ////////////////////////////////////////////////////////////////////////////////// //
    49  
    50  // Train trains words by given string slice
    51  func Train(words []string) *Model {
    52  	model := &Model{}
    53  
    54  	if len(words) == 0 {
    55  		return model
    56  	}
    57  
    58  	sm := make(map[string]bool)
    59  
    60  	for _, w := range words {
    61  		sm[w] = true
    62  	}
    63  
    64  	for cw := range sm {
    65  		model.terms = append(model.terms, cw)
    66  	}
    67  
    68  	return model
    69  }
    70  
    71  // Correct corrects given value
    72  func (m *Model) Correct(word string) string {
    73  	if len(m.terms) == 0 {
    74  		return word
    75  	}
    76  
    77  	var result *suggestItem
    78  
    79  	for _, si := range getSuggestSlice(m.terms, word) {
    80  		if result == nil {
    81  			result = si
    82  			continue
    83  		}
    84  
    85  		if si.score < result.score {
    86  			result = si
    87  			continue
    88  		}
    89  	}
    90  
    91  	if result.score > threshold {
    92  		return word
    93  	}
    94  
    95  	return result.term
    96  }
    97  
    98  // Suggest suggests words for given word or word part
    99  func (m *Model) Suggest(word string, max int) []string {
   100  	if len(m.terms) == 0 {
   101  		return []string{word}
   102  	}
   103  
   104  	if max == 1 {
   105  		return []string{m.Correct(word)}
   106  	}
   107  
   108  	sis := getSuggestSlice(m.terms, word)
   109  
   110  	sort.Sort(sis)
   111  
   112  	var result []string
   113  
   114  	for i := 0; i < mathutil.Between(max, 1, len(sis)); i++ {
   115  		result = append(result, sis[i].term)
   116  	}
   117  
   118  	return result
   119  }
   120  
   121  // ////////////////////////////////////////////////////////////////////////////////// //
   122  
   123  // I don't have an idea how we could separate this method
   124  // codebeat:disable[LOC,ABC,CYCLO]
   125  
   126  // Damerau–Levenshtein distance algorithm and code
   127  func getDLDistance(source, target string) int {
   128  	sl := len(source)
   129  	tl := len(target)
   130  
   131  	if sl == 0 {
   132  		if tl == 0 {
   133  			return 0
   134  		}
   135  		return tl
   136  	} else if tl == 0 {
   137  		return sl
   138  	}
   139  
   140  	h := make([][]int, sl+2)
   141  
   142  	for i := range h {
   143  		h[i] = make([]int, tl+2)
   144  	}
   145  
   146  	ll := sl + tl
   147  
   148  	h[0][0] = ll
   149  
   150  	for i := 0; i <= sl; i++ {
   151  		h[i+1][0] = ll
   152  		h[i+1][1] = i
   153  	}
   154  
   155  	for j := 0; j <= tl; j++ {
   156  		h[0][j+1] = ll
   157  		h[1][j+1] = j
   158  	}
   159  
   160  	sd := make(map[rune]int)
   161  
   162  	for _, rn := range source + target {
   163  		sd[rn] = 0
   164  	}
   165  
   166  	for i := 1; i <= sl; i++ {
   167  		d := 0
   168  
   169  		for j := 1; j <= tl; j++ {
   170  			i1 := sd[rune(target[j-1])]
   171  			j1 := d
   172  
   173  			if source[i-1] == target[j-1] {
   174  				h[i+1][j+1] = h[i][j]
   175  				d = j
   176  			} else {
   177  				h[i+1][j+1] = mathutil.Min(h[i][j], mathutil.Min(h[i+1][j], h[i][j+1])) + 1
   178  			}
   179  
   180  			h[i+1][j+1] = mathutil.Min(h[i+1][j+1], h[i1][j1]+(i-i1-1)+1+(j-j1-1))
   181  		}
   182  
   183  		sd[rune(source[i-1])] = i
   184  	}
   185  
   186  	return h[sl+1][tl+1]
   187  }
   188  
   189  // codebeat:enable[LOC,ABC,CYCLO]
   190  
   191  func getSuggestSlice(terms []string, word string) suggestItems {
   192  	var result suggestItems
   193  
   194  	for _, t := range terms {
   195  		result = append(result, &suggestItem{t, getDLDistance(strings.ToLower(t), strings.ToLower(word))})
   196  	}
   197  
   198  	return result
   199  }