pkg.re/essentialkaos/ek.10@v12.41.0+incompatible/spellcheck/spellcheck.go (about) 1 // Package spellcheck provides spellcheck based on Damerau–Levenshtein distance algorithm 2 package spellcheck 3 4 // ////////////////////////////////////////////////////////////////////////////////// // 5 // // 6 // Copyright (c) 2022 ESSENTIAL KAOS // 7 // Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> // 8 // // 9 // ////////////////////////////////////////////////////////////////////////////////// // 10 11 import ( 12 "sort" 13 "strings" 14 15 "pkg.re/essentialkaos/ek.v12/mathutil" 16 ) 17 18 // ////////////////////////////////////////////////////////////////////////////////// // 19 20 // Model is spellcheck model struct 21 type Model struct { 22 terms []string 23 } 24 25 // ////////////////////////////////////////////////////////////////////////////////// // 26 27 type suggestItem struct { 28 term string 29 score int 30 } 31 32 type suggestItems []*suggestItem 33 34 func (s suggestItems) Len() int { 35 return len(s) 36 } 37 38 func (s suggestItems) Less(i, j int) bool { 39 return s[i].score < s[j].score 40 } 41 42 func (s suggestItems) Swap(i, j int) { 43 s[i], s[j] = s[j], s[i] 44 } 45 46 var threshold = 2 47 48 // ////////////////////////////////////////////////////////////////////////////////// // 49 50 // Train trains words by given string slice 51 func Train(words []string) *Model { 52 model := &Model{} 53 54 if len(words) == 0 { 55 return model 56 } 57 58 sm := make(map[string]bool) 59 60 for _, w := range words { 61 sm[w] = true 62 } 63 64 for cw := range sm { 65 model.terms = append(model.terms, cw) 66 } 67 68 return model 69 } 70 71 // Correct corrects given value 72 func (m *Model) Correct(word string) string { 73 if len(m.terms) == 0 { 74 return word 75 } 76 77 var result *suggestItem 78 79 for _, si := range getSuggestSlice(m.terms, word) { 80 if result == nil { 81 result = si 82 continue 83 } 84 85 if si.score < result.score { 86 result = si 87 continue 88 } 89 } 90 91 if result.score > threshold { 92 return word 93 } 94 95 return result.term 96 } 97 98 // Suggest suggests words for given word or word part 99 func (m *Model) Suggest(word string, max int) []string { 100 if len(m.terms) == 0 { 101 return []string{word} 102 } 103 104 if max == 1 { 105 return []string{m.Correct(word)} 106 } 107 108 sis := getSuggestSlice(m.terms, word) 109 110 sort.Sort(sis) 111 112 var result []string 113 114 for i := 0; i < mathutil.Between(max, 1, len(sis)); i++ { 115 result = append(result, sis[i].term) 116 } 117 118 return result 119 } 120 121 // ////////////////////////////////////////////////////////////////////////////////// // 122 123 // I don't have an idea how we could separate this method 124 // codebeat:disable[LOC,ABC,CYCLO] 125 126 // Damerau–Levenshtein distance algorithm and code 127 func getDLDistance(source, target string) int { 128 sl := len(source) 129 tl := len(target) 130 131 if sl == 0 { 132 if tl == 0 { 133 return 0 134 } 135 return tl 136 } else if tl == 0 { 137 return sl 138 } 139 140 h := make([][]int, sl+2) 141 142 for i := range h { 143 h[i] = make([]int, tl+2) 144 } 145 146 ll := sl + tl 147 148 h[0][0] = ll 149 150 for i := 0; i <= sl; i++ { 151 h[i+1][0] = ll 152 h[i+1][1] = i 153 } 154 155 for j := 0; j <= tl; j++ { 156 h[0][j+1] = ll 157 h[1][j+1] = j 158 } 159 160 sd := make(map[rune]int) 161 162 for _, rn := range source + target { 163 sd[rn] = 0 164 } 165 166 for i := 1; i <= sl; i++ { 167 d := 0 168 169 for j := 1; j <= tl; j++ { 170 i1 := sd[rune(target[j-1])] 171 j1 := d 172 173 if source[i-1] == target[j-1] { 174 h[i+1][j+1] = h[i][j] 175 d = j 176 } else { 177 h[i+1][j+1] = mathutil.Min(h[i][j], mathutil.Min(h[i+1][j], h[i][j+1])) + 1 178 } 179 180 h[i+1][j+1] = mathutil.Min(h[i+1][j+1], h[i1][j1]+(i-i1-1)+1+(j-j1-1)) 181 } 182 183 sd[rune(source[i-1])] = i 184 } 185 186 return h[sl+1][tl+1] 187 } 188 189 // codebeat:enable[LOC,ABC,CYCLO] 190 191 func getSuggestSlice(terms []string, word string) suggestItems { 192 var result suggestItems 193 194 for _, t := range terms { 195 result = append(result, &suggestItem{t, getDLDistance(strings.ToLower(t), strings.ToLower(word))}) 196 } 197 198 return result 199 }