github.com/errata-ai/vale/v3@v3.4.2/internal/spell/aff.go (about) 1 package spell 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "regexp" 8 "strconv" 9 "strings" 10 ) 11 12 // affixType is either an affix prefix or suffix 13 type affixType int 14 15 // specific Affix types 16 const ( 17 Prefix affixType = iota 18 Suffix 19 ) 20 21 // affix is a rule for affix (adding prefixes or suffixes) 22 type affix struct { 23 Rules []rule // - 24 Type affixType // either PFX or SFX 25 CrossProduct bool // - 26 } 27 28 // expand provides all variations of a given word based on this affix rule 29 func (a affix) expand(word string, out []string) []string { 30 for _, r := range a.Rules { 31 if r.matcher != nil && !r.matcher.MatchString(word) { 32 continue 33 } 34 if a.Type == Prefix { 35 out = append(out, r.AffixText+word) 36 // TODO is does Strip apply to prefixes too? 37 } else { 38 stripWord := word 39 if r.Strip != "" && strings.HasSuffix(word, r.Strip) { 40 stripWord = word[:len(word)-len(r.Strip)] 41 } 42 out = append(out, stripWord+r.AffixText) 43 } 44 } 45 return out 46 } 47 48 // rule is a Affix rule 49 type rule struct { 50 Strip string 51 AffixText string // suffix or prefix text to add 52 Pattern string // original matching pattern from AFF file 53 matcher *regexp.Regexp // matcher to see if this rule applies or not 54 } 55 56 // dictConfig is a partial representation of a Hunspell AFF (Affix) file. 57 type dictConfig struct { 58 IconvReplacements []string 59 Replacements [][2]string 60 CompoundRule []string 61 Flag string 62 TryChars string 63 WordChars string 64 CompoundOnly string 65 AffixMap map[rune]affix 66 CamelCase int 67 CompoundMin int64 68 compoundMap map[rune][]string 69 NoSuggestFlag string 70 } 71 72 // expand expands a word/affix using dictionary/affix rules 73 // 74 // This also supports CompoundRule flags 75 func (a dictConfig) expand(wordAffix string, out []string) ([]string, error) { 76 out = out[:0] 77 idx := strings.Index(wordAffix, "/") 78 79 // not found 80 if idx == -1 { 81 out = append(out, wordAffix) 82 return out, nil 83 } 84 if idx == 0 || idx+1 == len(wordAffix) { 85 return nil, fmt.Errorf("slash char found in first or last position") 86 } 87 // safe 88 word, keyString := wordAffix[:idx], wordAffix[idx+1:] 89 90 // check to see if any of the flags are in the 91 // "compound only". If so then nothing to add 92 compoundOnly := false 93 for _, key := range keyString { 94 if strings.ContainsRune(a.CompoundOnly, key) { 95 compoundOnly = true 96 continue 97 } 98 if _, ok := a.compoundMap[key]; !ok { 99 // the isn't a compound flag 100 continue 101 } 102 // is a compound flag 103 a.compoundMap[key] = append(a.compoundMap[key], word) 104 } 105 106 if compoundOnly { 107 return out, nil 108 } 109 110 out = append(out, word) 111 prefixes := make([]affix, 0, 5) 112 suffixes := make([]affix, 0, 5) 113 for _, key := range keyString { 114 // want keyString to []?something? 115 // then iterate over that 116 af, ok := a.AffixMap[key] 117 if !ok { 118 // TODO: How should we handle this? 119 continue 120 } 121 if !af.CrossProduct { 122 out = af.expand(word, out) 123 continue 124 } 125 if af.Type == Prefix { 126 prefixes = append(prefixes, af) 127 } else { 128 suffixes = append(suffixes, af) 129 } 130 } 131 132 // expand all suffixes with out any prefixes 133 for _, suf := range suffixes { 134 out = suf.expand(word, out) 135 } 136 for _, pre := range prefixes { 137 prewords := pre.expand(word, nil) 138 out = append(out, prewords...) 139 140 // now do cross product 141 for _, suf := range suffixes { 142 for _, w := range prewords { 143 out = suf.expand(w, out) 144 } 145 } 146 } 147 return out, nil 148 } 149 150 func isCrossProduct(val string) (bool, error) { 151 switch val { 152 case "Y": 153 return true, nil 154 case "N": 155 return false, nil 156 } 157 return false, fmt.Errorf("CrossProduct is not Y or N: got %q", val) 158 } 159 160 // newDictConfig reads an Hunspell AFF file 161 func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen 162 aff := dictConfig{ 163 Flag: "ASCII", 164 AffixMap: make(map[rune]affix), 165 compoundMap: make(map[rune][]string), 166 CompoundMin: 3, // default in Hunspell 167 } 168 scanner := bufio.NewScanner(file) 169 for scanner.Scan() { 170 line := scanner.Text() 171 172 parts := strings.Fields(line) 173 if len(parts) == 0 { 174 continue 175 } 176 177 switch parts[0] { 178 case "TRY": 179 if len(parts) < 2 { 180 return nil, fmt.Errorf("TRY stanza had %d fields, expected 2", len(parts)) 181 } 182 aff.TryChars = parts[1] 183 case "ICONV": 184 // if only 2 fields, then its the first stanza that just provides a count 185 // we don't care, as we dynamically allocate 186 if len(parts) == 2 { 187 continue 188 } else if len(parts) < 3 { 189 return nil, fmt.Errorf("ICONV stanza had %d fields, expected 2", len(parts)) 190 } 191 aff.IconvReplacements = append(aff.IconvReplacements, parts[1], parts[2]) 192 case "REP": 193 if len(parts) == 2 { 194 continue 195 } else if len(parts) < 3 { 196 return nil, fmt.Errorf("REP stanza had %d fields, expected 2", len(parts)) 197 } 198 aff.Replacements = append(aff.Replacements, [2]string{parts[1], parts[2]}) 199 case "COMPOUNDMIN": 200 if len(parts) < 2 { 201 return nil, fmt.Errorf("COMPOUNDMIN stanza had %d fields, expected 2", len(parts)) 202 } 203 val, err := strconv.ParseInt(parts[1], 10, 64) 204 if err != nil { 205 return nil, fmt.Errorf("COMPOUNDMIN stanza had %q expected number", parts[1]) 206 } 207 aff.CompoundMin = val 208 case "ONLYINCOMPOUND": 209 if len(parts) < 2 { 210 return nil, fmt.Errorf("ONLYINCOMPOUND stanza had %d fields, expected 2", len(parts)) 211 } 212 aff.CompoundOnly = parts[1] 213 case "COMPOUNDRULE": 214 if len(parts) < 2 { 215 return nil, fmt.Errorf("COMPOUNDRULE stanza had %d fields, expected 2", len(parts)) 216 } 217 val, err := strconv.ParseInt(parts[1], 10, 64) 218 if err == nil { 219 aff.CompoundRule = make([]string, 0, val) 220 } else { 221 aff.CompoundRule = append(aff.CompoundRule, parts[1]) 222 for _, char := range parts[1] { 223 if _, ok := aff.compoundMap[char]; !ok { 224 aff.compoundMap[char] = []string{} 225 } 226 } 227 } 228 case "NOSUGGEST": 229 if len(parts) < 2 { 230 return nil, fmt.Errorf("NOSUGGEST stanza had %d fields, expected 2", len(parts)) 231 } 232 aff.NoSuggestFlag = parts[1] 233 case "WORDCHARS": 234 if len(parts) < 2 { 235 return nil, fmt.Errorf("WORDCHAR stanza had %d fields, expected 2", len(parts)) 236 } 237 aff.WordChars = parts[1] 238 case "FLAG": 239 if len(parts) < 2 { 240 return nil, fmt.Errorf("FLAG stanza had %d, expected 1", len(parts)) 241 } 242 aff.Flag = parts[1] 243 case "PFX", "SFX": 244 atype := Prefix 245 if parts[0] == "SFX" { 246 atype = Suffix 247 } 248 249 sections := len(parts) 250 if sections > 4 { 251 // does this need to be split out into suffix and prefix? 252 flag := rune(parts[1][0]) 253 a, ok := aff.AffixMap[flag] 254 if !ok { 255 return nil, fmt.Errorf("got rules for flag %q but no definition", flag) 256 } 257 258 strip := "" 259 if parts[2] != "0" { 260 strip = parts[2] 261 } 262 263 var matcher *regexp.Regexp 264 var err error 265 pat := parts[4] 266 if pat != "." { 267 if a.Type == Prefix { 268 pat = "^" + pat 269 } else { 270 pat += "$" 271 } 272 matcher, err = regexp.Compile(pat) 273 if err != nil { 274 return nil, fmt.Errorf("unable to compile %s", pat) 275 } 276 } 277 278 // See #499. 279 // 280 // TODO: Is this safe to do in all cases? 281 if parts[3] == "0" { 282 parts[3] = "" 283 } 284 285 a.Rules = append(a.Rules, rule{ 286 Strip: strip, 287 AffixText: parts[3], 288 Pattern: parts[4], 289 matcher: matcher, 290 }) 291 aff.AffixMap[flag] = a 292 } else if sections > 3 { 293 cross, err := isCrossProduct(parts[2]) 294 if err != nil { 295 return nil, err 296 } 297 // this is a new Affix! 298 a := affix{ 299 Type: atype, 300 CrossProduct: cross, 301 } 302 flag := rune(parts[1][0]) 303 aff.AffixMap[flag] = a 304 } 305 default: 306 // Do nothing. 307 // 308 // Hunspell ignores lines that don't start with a known directive. 309 } 310 } 311 312 if err := scanner.Err(); err != nil { 313 return nil, err 314 } 315 316 return &aff, nil 317 }