github.com/errata-ai/vale/v3@v3.4.2/internal/spell/aff.go (about)

     1  package spell
     2  
     3  import (
     4  	"bufio"
     5  	"fmt"
     6  	"io"
     7  	"regexp"
     8  	"strconv"
     9  	"strings"
    10  )
    11  
    12  // affixType is either an affix prefix or suffix
    13  type affixType int
    14  
    15  // specific Affix types
    16  const (
    17  	Prefix affixType = iota
    18  	Suffix
    19  )
    20  
    21  // affix is a rule for affix (adding prefixes or suffixes)
    22  type affix struct {
    23  	Rules        []rule    // -
    24  	Type         affixType // either PFX or SFX
    25  	CrossProduct bool      // -
    26  }
    27  
    28  // expand provides all variations of a given word based on this affix rule
    29  func (a affix) expand(word string, out []string) []string {
    30  	for _, r := range a.Rules {
    31  		if r.matcher != nil && !r.matcher.MatchString(word) {
    32  			continue
    33  		}
    34  		if a.Type == Prefix {
    35  			out = append(out, r.AffixText+word)
    36  			// TODO is does Strip apply to prefixes too?
    37  		} else {
    38  			stripWord := word
    39  			if r.Strip != "" && strings.HasSuffix(word, r.Strip) {
    40  				stripWord = word[:len(word)-len(r.Strip)]
    41  			}
    42  			out = append(out, stripWord+r.AffixText)
    43  		}
    44  	}
    45  	return out
    46  }
    47  
    48  // rule is a Affix rule
    49  type rule struct {
    50  	Strip     string
    51  	AffixText string         // suffix or prefix text to add
    52  	Pattern   string         // original matching pattern from AFF file
    53  	matcher   *regexp.Regexp // matcher to see if this rule applies or not
    54  }
    55  
    56  // dictConfig is a partial representation of a Hunspell AFF (Affix) file.
    57  type dictConfig struct {
    58  	IconvReplacements []string
    59  	Replacements      [][2]string
    60  	CompoundRule      []string
    61  	Flag              string
    62  	TryChars          string
    63  	WordChars         string
    64  	CompoundOnly      string
    65  	AffixMap          map[rune]affix
    66  	CamelCase         int
    67  	CompoundMin       int64
    68  	compoundMap       map[rune][]string
    69  	NoSuggestFlag     string
    70  }
    71  
    72  // expand expands a word/affix using dictionary/affix rules
    73  //
    74  //	This also supports CompoundRule flags
    75  func (a dictConfig) expand(wordAffix string, out []string) ([]string, error) {
    76  	out = out[:0]
    77  	idx := strings.Index(wordAffix, "/")
    78  
    79  	// not found
    80  	if idx == -1 {
    81  		out = append(out, wordAffix)
    82  		return out, nil
    83  	}
    84  	if idx == 0 || idx+1 == len(wordAffix) {
    85  		return nil, fmt.Errorf("slash char found in first or last position")
    86  	}
    87  	// safe
    88  	word, keyString := wordAffix[:idx], wordAffix[idx+1:]
    89  
    90  	// check to see if any of the flags are in the
    91  	// "compound only".  If so then nothing to add
    92  	compoundOnly := false
    93  	for _, key := range keyString {
    94  		if strings.ContainsRune(a.CompoundOnly, key) {
    95  			compoundOnly = true
    96  			continue
    97  		}
    98  		if _, ok := a.compoundMap[key]; !ok {
    99  			// the isn't a compound flag
   100  			continue
   101  		}
   102  		// is a compound flag
   103  		a.compoundMap[key] = append(a.compoundMap[key], word)
   104  	}
   105  
   106  	if compoundOnly {
   107  		return out, nil
   108  	}
   109  
   110  	out = append(out, word)
   111  	prefixes := make([]affix, 0, 5)
   112  	suffixes := make([]affix, 0, 5)
   113  	for _, key := range keyString {
   114  		// want keyString to []?something?
   115  		// then iterate over that
   116  		af, ok := a.AffixMap[key]
   117  		if !ok {
   118  			// TODO: How should we handle this?
   119  			continue
   120  		}
   121  		if !af.CrossProduct {
   122  			out = af.expand(word, out)
   123  			continue
   124  		}
   125  		if af.Type == Prefix {
   126  			prefixes = append(prefixes, af)
   127  		} else {
   128  			suffixes = append(suffixes, af)
   129  		}
   130  	}
   131  
   132  	// expand all suffixes with out any prefixes
   133  	for _, suf := range suffixes {
   134  		out = suf.expand(word, out)
   135  	}
   136  	for _, pre := range prefixes {
   137  		prewords := pre.expand(word, nil)
   138  		out = append(out, prewords...)
   139  
   140  		// now do cross product
   141  		for _, suf := range suffixes {
   142  			for _, w := range prewords {
   143  				out = suf.expand(w, out)
   144  			}
   145  		}
   146  	}
   147  	return out, nil
   148  }
   149  
   150  func isCrossProduct(val string) (bool, error) {
   151  	switch val {
   152  	case "Y":
   153  		return true, nil
   154  	case "N":
   155  		return false, nil
   156  	}
   157  	return false, fmt.Errorf("CrossProduct is not Y or N: got %q", val)
   158  }
   159  
   160  // newDictConfig reads an Hunspell AFF file
   161  func newDictConfig(file io.Reader) (*dictConfig, error) { //nolint:funlen
   162  	aff := dictConfig{
   163  		Flag:        "ASCII",
   164  		AffixMap:    make(map[rune]affix),
   165  		compoundMap: make(map[rune][]string),
   166  		CompoundMin: 3, // default in Hunspell
   167  	}
   168  	scanner := bufio.NewScanner(file)
   169  	for scanner.Scan() {
   170  		line := scanner.Text()
   171  
   172  		parts := strings.Fields(line)
   173  		if len(parts) == 0 {
   174  			continue
   175  		}
   176  
   177  		switch parts[0] {
   178  		case "TRY":
   179  			if len(parts) < 2 {
   180  				return nil, fmt.Errorf("TRY stanza had %d fields, expected 2", len(parts))
   181  			}
   182  			aff.TryChars = parts[1]
   183  		case "ICONV":
   184  			// if only 2 fields, then its the first stanza that just provides a count
   185  			//  we don't care, as we dynamically allocate
   186  			if len(parts) == 2 {
   187  				continue
   188  			} else if len(parts) < 3 {
   189  				return nil, fmt.Errorf("ICONV stanza had %d fields, expected 2", len(parts))
   190  			}
   191  			aff.IconvReplacements = append(aff.IconvReplacements, parts[1], parts[2])
   192  		case "REP":
   193  			if len(parts) == 2 {
   194  				continue
   195  			} else if len(parts) < 3 {
   196  				return nil, fmt.Errorf("REP stanza had %d fields, expected 2", len(parts))
   197  			}
   198  			aff.Replacements = append(aff.Replacements, [2]string{parts[1], parts[2]})
   199  		case "COMPOUNDMIN":
   200  			if len(parts) < 2 {
   201  				return nil, fmt.Errorf("COMPOUNDMIN stanza had %d fields, expected 2", len(parts))
   202  			}
   203  			val, err := strconv.ParseInt(parts[1], 10, 64)
   204  			if err != nil {
   205  				return nil, fmt.Errorf("COMPOUNDMIN stanza had %q expected number", parts[1])
   206  			}
   207  			aff.CompoundMin = val
   208  		case "ONLYINCOMPOUND":
   209  			if len(parts) < 2 {
   210  				return nil, fmt.Errorf("ONLYINCOMPOUND stanza had %d fields, expected 2", len(parts))
   211  			}
   212  			aff.CompoundOnly = parts[1]
   213  		case "COMPOUNDRULE":
   214  			if len(parts) < 2 {
   215  				return nil, fmt.Errorf("COMPOUNDRULE stanza had %d fields, expected 2", len(parts))
   216  			}
   217  			val, err := strconv.ParseInt(parts[1], 10, 64)
   218  			if err == nil {
   219  				aff.CompoundRule = make([]string, 0, val)
   220  			} else {
   221  				aff.CompoundRule = append(aff.CompoundRule, parts[1])
   222  				for _, char := range parts[1] {
   223  					if _, ok := aff.compoundMap[char]; !ok {
   224  						aff.compoundMap[char] = []string{}
   225  					}
   226  				}
   227  			}
   228  		case "NOSUGGEST":
   229  			if len(parts) < 2 {
   230  				return nil, fmt.Errorf("NOSUGGEST stanza had %d fields, expected 2", len(parts))
   231  			}
   232  			aff.NoSuggestFlag = parts[1]
   233  		case "WORDCHARS":
   234  			if len(parts) < 2 {
   235  				return nil, fmt.Errorf("WORDCHAR stanza had %d fields, expected 2", len(parts))
   236  			}
   237  			aff.WordChars = parts[1]
   238  		case "FLAG":
   239  			if len(parts) < 2 {
   240  				return nil, fmt.Errorf("FLAG stanza had %d, expected 1", len(parts))
   241  			}
   242  			aff.Flag = parts[1]
   243  		case "PFX", "SFX":
   244  			atype := Prefix
   245  			if parts[0] == "SFX" {
   246  				atype = Suffix
   247  			}
   248  
   249  			sections := len(parts)
   250  			if sections > 4 {
   251  				// does this need to be split out into suffix and prefix?
   252  				flag := rune(parts[1][0])
   253  				a, ok := aff.AffixMap[flag]
   254  				if !ok {
   255  					return nil, fmt.Errorf("got rules for flag %q but no definition", flag)
   256  				}
   257  
   258  				strip := ""
   259  				if parts[2] != "0" {
   260  					strip = parts[2]
   261  				}
   262  
   263  				var matcher *regexp.Regexp
   264  				var err error
   265  				pat := parts[4]
   266  				if pat != "." {
   267  					if a.Type == Prefix {
   268  						pat = "^" + pat
   269  					} else {
   270  						pat += "$"
   271  					}
   272  					matcher, err = regexp.Compile(pat)
   273  					if err != nil {
   274  						return nil, fmt.Errorf("unable to compile %s", pat)
   275  					}
   276  				}
   277  
   278  				// See #499.
   279  				//
   280  				// TODO: Is this safe to do in all cases?
   281  				if parts[3] == "0" {
   282  					parts[3] = ""
   283  				}
   284  
   285  				a.Rules = append(a.Rules, rule{
   286  					Strip:     strip,
   287  					AffixText: parts[3],
   288  					Pattern:   parts[4],
   289  					matcher:   matcher,
   290  				})
   291  				aff.AffixMap[flag] = a
   292  			} else if sections > 3 {
   293  				cross, err := isCrossProduct(parts[2])
   294  				if err != nil {
   295  					return nil, err
   296  				}
   297  				// this is a new Affix!
   298  				a := affix{
   299  					Type:         atype,
   300  					CrossProduct: cross,
   301  				}
   302  				flag := rune(parts[1][0])
   303  				aff.AffixMap[flag] = a
   304  			}
   305  		default:
   306  			// Do nothing.
   307  			//
   308  			// Hunspell ignores lines that don't start with a known directive.
   309  		}
   310  	}
   311  
   312  	if err := scanner.Err(); err != nil {
   313  		return nil, err
   314  	}
   315  
   316  	return &aff, nil
   317  }