github.com/errata-ai/vale/v3@v3.4.2/internal/check/substitution.go (about) 1 package check 2 3 import ( 4 "fmt" 5 "sort" 6 "strings" 7 8 "github.com/errata-ai/regexp2" 9 "golang.org/x/exp/maps" 10 11 "github.com/errata-ai/vale/v3/internal/core" 12 "github.com/errata-ai/vale/v3/internal/nlp" 13 ) 14 15 // Substitution switches the values of Swap for its keys. 16 type Substitution struct { 17 Definition `mapstructure:",squash"` 18 Exceptions []string 19 repl []string 20 Swap map[string]string 21 exceptRe *regexp2.Regexp 22 pattern *regexp2.Regexp 23 Ignorecase bool 24 Nonword bool 25 Vocab bool 26 Capitalize bool 27 28 // Deprecated 29 POS string 30 } 31 32 // NewSubstitution creates a new `substitution`-based rule. 33 func NewSubstitution(cfg *core.Config, generic baseCheck, path string) (Substitution, error) { 34 rule := Substitution{Vocab: true} 35 36 err := decodeRule(generic, &rule) 37 if err != nil { 38 return rule, readStructureError(err, path) 39 } 40 41 err = checkScopes(rule.Scope, path) 42 if err != nil { 43 return rule, err 44 } 45 tokens := "" 46 47 re, err := updateExceptions(rule.Exceptions, cfg.AcceptedTokens, rule.Vocab) 48 if err != nil { 49 return rule, core.NewE201FromPosition(err.Error(), path, 1) 50 } 51 rule.exceptRe = re 52 53 regex := makeRegexp( 54 cfg.WordTemplate, 55 rule.Ignorecase, 56 func() bool { return !rule.Nonword }, 57 func() string { return "" }, true) 58 59 terms := maps.Keys(rule.Swap) 60 sort.Slice(terms, func(p, q int) bool { 61 return len(terms[p]) > len(terms[q]) 62 }) 63 64 replacements := []string{} 65 for _, regexstr := range terms { 66 replacement := rule.Swap[regexstr] 67 68 opens := strings.Count(regexstr, "(") 69 if opens != strings.Count(regexstr, "(?")+strings.Count(regexstr, `\(`) { 70 // We rely on manually-added capture groups to associate a match 71 // with its replacement -- e.g., 72 // 73 // `(foo)|(bar)`, [replacement1, replacement2] 74 // 75 // where the first capture group ("foo") corresponds to the first 76 // element of the replacements slice ("replacement1"). This means 77 // that we can only accept non-capture groups from the user (the 78 // indexing would be mixed up otherwise). 79 // 80 // TODO: Should we change this? Perhaps by creating a map of regex 81 // to replacements? 82 return rule, core.NewE201FromTarget( 83 "capture group not supported; use '(?:' instead of '('", regexstr, path) 84 } 85 tokens += `(` + regexstr + `)|` 86 replacements = append(replacements, replacement) 87 } 88 regex = fmt.Sprintf(regex, strings.TrimRight(tokens, "|")) 89 90 re, err = regexp2.CompileStd(regex) 91 if err != nil { 92 return rule, core.NewE201FromPosition(err.Error(), path, 1) 93 } 94 95 rule.pattern = re 96 rule.repl = replacements 97 return rule, nil 98 } 99 100 // Run executes the the `substitution`-based rule. 101 // 102 // The rule looks for one pattern and then suggests a replacement. 103 func (s Substitution) Run(blk nlp.Block, _ *core.File) ([]core.Alert, error) { 104 var alerts []core.Alert 105 106 txt := blk.Text 107 // Leave early if we can to avoid calling `FindAllStringSubmatchIndex` 108 // unnecessarily. 109 if !s.pattern.MatchStringStd(txt) { 110 return alerts, nil 111 } 112 113 for _, submat := range s.pattern.FindAllStringSubmatchIndex(txt, -1) { 114 for idx, mat := range submat { 115 if mat != -1 && idx > 0 && idx%2 == 0 { 116 loc := []int{mat, submat[idx+1]} 117 118 converted, err := re2Loc(txt, loc) 119 if err != nil { 120 return alerts, err 121 } 122 123 // Based on the current capture group (`idx`), we can determine 124 // the associated replacement string by using the `repl` slice: 125 expected := s.repl[(idx/2)-1] 126 observed := strings.TrimSpace(converted) 127 128 same := matchToken(expected, observed, s.Ignorecase) 129 if !same && !isMatch(s.exceptRe, observed) { 130 action := s.Fields().Action 131 if action.Name == "replace" && len(action.Params) == 0 { 132 action.Params = strings.Split(expected, "|") 133 134 if s.Capitalize && observed == core.CapFirst(observed) { 135 cased := []string{} 136 for _, param := range action.Params { 137 cased = append(cased, core.CapFirst(param)) 138 } 139 action.Params = cased 140 } 141 142 expected = core.ToSentence(action.Params, "or") 143 // NOTE: For backwards-compatibility, we need to ensure 144 // that we don't double quote. 145 s.Message = convertMessage(s.Message) 146 } 147 148 a, aerr := makeAlert(s.Definition, loc, txt) 149 if aerr != nil { 150 return alerts, aerr 151 } 152 153 a.Message, a.Description = formatMessages(s.Message, 154 s.Description, expected, observed) 155 a.Action = action 156 157 alerts = append(alerts, a) 158 } 159 } 160 } 161 } 162 163 return alerts, nil 164 } 165 166 // Fields provides access to the internal rule definition. 167 func (s Substitution) Fields() Definition { 168 return s.Definition 169 } 170 171 // Pattern is the internal regex pattern used by this rule. 172 func (s Substitution) Pattern() string { 173 return s.pattern.String() 174 } 175 176 func convertMessage(s string) string { 177 for _, spec := range []string{"'%s'", "\"%s\""} { 178 if strings.Count(s, spec) == 2 { 179 s = strings.Replace(s, spec, "%s", 1) 180 } 181 } 182 return s 183 }