github.com/errata-ai/vale/v3@v3.4.2/internal/check/substitution.go (about)

     1  package check
     2  
     3  import (
     4  	"fmt"
     5  	"sort"
     6  	"strings"
     7  
     8  	"github.com/errata-ai/regexp2"
     9  	"golang.org/x/exp/maps"
    10  
    11  	"github.com/errata-ai/vale/v3/internal/core"
    12  	"github.com/errata-ai/vale/v3/internal/nlp"
    13  )
    14  
    15  // Substitution switches the values of Swap for its keys.
    16  type Substitution struct {
    17  	Definition `mapstructure:",squash"`
    18  	Exceptions []string
    19  	repl       []string
    20  	Swap       map[string]string
    21  	exceptRe   *regexp2.Regexp
    22  	pattern    *regexp2.Regexp
    23  	Ignorecase bool
    24  	Nonword    bool
    25  	Vocab      bool
    26  	Capitalize bool
    27  
    28  	// Deprecated
    29  	POS string
    30  }
    31  
    32  // NewSubstitution creates a new `substitution`-based rule.
    33  func NewSubstitution(cfg *core.Config, generic baseCheck, path string) (Substitution, error) {
    34  	rule := Substitution{Vocab: true}
    35  
    36  	err := decodeRule(generic, &rule)
    37  	if err != nil {
    38  		return rule, readStructureError(err, path)
    39  	}
    40  
    41  	err = checkScopes(rule.Scope, path)
    42  	if err != nil {
    43  		return rule, err
    44  	}
    45  	tokens := ""
    46  
    47  	re, err := updateExceptions(rule.Exceptions, cfg.AcceptedTokens, rule.Vocab)
    48  	if err != nil {
    49  		return rule, core.NewE201FromPosition(err.Error(), path, 1)
    50  	}
    51  	rule.exceptRe = re
    52  
    53  	regex := makeRegexp(
    54  		cfg.WordTemplate,
    55  		rule.Ignorecase,
    56  		func() bool { return !rule.Nonword },
    57  		func() string { return "" }, true)
    58  
    59  	terms := maps.Keys(rule.Swap)
    60  	sort.Slice(terms, func(p, q int) bool {
    61  		return len(terms[p]) > len(terms[q])
    62  	})
    63  
    64  	replacements := []string{}
    65  	for _, regexstr := range terms {
    66  		replacement := rule.Swap[regexstr]
    67  
    68  		opens := strings.Count(regexstr, "(")
    69  		if opens != strings.Count(regexstr, "(?")+strings.Count(regexstr, `\(`) {
    70  			// We rely on manually-added capture groups to associate a match
    71  			// with its replacement -- e.g.,
    72  			//
    73  			//    `(foo)|(bar)`, [replacement1, replacement2]
    74  			//
    75  			// where the first capture group ("foo") corresponds to the first
    76  			// element of the replacements slice ("replacement1"). This means
    77  			// that we can only accept non-capture groups from the user (the
    78  			// indexing would be mixed up otherwise).
    79  			//
    80  			// TODO: Should we change this? Perhaps by creating a map of regex
    81  			// to replacements?
    82  			return rule, core.NewE201FromTarget(
    83  				"capture group not supported; use '(?:' instead of '('", regexstr, path)
    84  		}
    85  		tokens += `(` + regexstr + `)|`
    86  		replacements = append(replacements, replacement)
    87  	}
    88  	regex = fmt.Sprintf(regex, strings.TrimRight(tokens, "|"))
    89  
    90  	re, err = regexp2.CompileStd(regex)
    91  	if err != nil {
    92  		return rule, core.NewE201FromPosition(err.Error(), path, 1)
    93  	}
    94  
    95  	rule.pattern = re
    96  	rule.repl = replacements
    97  	return rule, nil
    98  }
    99  
   100  // Run executes the the `substitution`-based rule.
   101  //
   102  // The rule looks for one pattern and then suggests a replacement.
   103  func (s Substitution) Run(blk nlp.Block, _ *core.File) ([]core.Alert, error) {
   104  	var alerts []core.Alert
   105  
   106  	txt := blk.Text
   107  	// Leave early if we can to avoid calling `FindAllStringSubmatchIndex`
   108  	// unnecessarily.
   109  	if !s.pattern.MatchStringStd(txt) {
   110  		return alerts, nil
   111  	}
   112  
   113  	for _, submat := range s.pattern.FindAllStringSubmatchIndex(txt, -1) {
   114  		for idx, mat := range submat {
   115  			if mat != -1 && idx > 0 && idx%2 == 0 {
   116  				loc := []int{mat, submat[idx+1]}
   117  
   118  				converted, err := re2Loc(txt, loc)
   119  				if err != nil {
   120  					return alerts, err
   121  				}
   122  
   123  				// Based on the current capture group (`idx`), we can determine
   124  				// the associated replacement string by using the `repl` slice:
   125  				expected := s.repl[(idx/2)-1]
   126  				observed := strings.TrimSpace(converted)
   127  
   128  				same := matchToken(expected, observed, s.Ignorecase)
   129  				if !same && !isMatch(s.exceptRe, observed) {
   130  					action := s.Fields().Action
   131  					if action.Name == "replace" && len(action.Params) == 0 {
   132  						action.Params = strings.Split(expected, "|")
   133  
   134  						if s.Capitalize && observed == core.CapFirst(observed) {
   135  							cased := []string{}
   136  							for _, param := range action.Params {
   137  								cased = append(cased, core.CapFirst(param))
   138  							}
   139  							action.Params = cased
   140  						}
   141  
   142  						expected = core.ToSentence(action.Params, "or")
   143  						// NOTE: For backwards-compatibility, we need to ensure
   144  						// that we don't double quote.
   145  						s.Message = convertMessage(s.Message)
   146  					}
   147  
   148  					a, aerr := makeAlert(s.Definition, loc, txt)
   149  					if aerr != nil {
   150  						return alerts, aerr
   151  					}
   152  
   153  					a.Message, a.Description = formatMessages(s.Message,
   154  						s.Description, expected, observed)
   155  					a.Action = action
   156  
   157  					alerts = append(alerts, a)
   158  				}
   159  			}
   160  		}
   161  	}
   162  
   163  	return alerts, nil
   164  }
   165  
   166  // Fields provides access to the internal rule definition.
   167  func (s Substitution) Fields() Definition {
   168  	return s.Definition
   169  }
   170  
   171  // Pattern is the internal regex pattern used by this rule.
   172  func (s Substitution) Pattern() string {
   173  	return s.pattern.String()
   174  }
   175  
   176  func convertMessage(s string) string {
   177  	for _, spec := range []string{"'%s'", "\"%s\""} {
   178  		if strings.Count(s, spec) == 2 {
   179  			s = strings.Replace(s, spec, "%s", 1)
   180  		}
   181  	}
   182  	return s
   183  }