github.com/errata-ai/vale/v3@v3.4.2/internal/check/definition.go (about) 1 package check 2 3 import ( 4 "fmt" 5 "regexp" 6 "sort" 7 "strconv" 8 "strings" 9 10 "github.com/errata-ai/regexp2" 11 "github.com/mitchellh/mapstructure" 12 "gopkg.in/yaml.v2" 13 14 "github.com/errata-ai/vale/v3/internal/core" 15 "github.com/errata-ai/vale/v3/internal/nlp" 16 ) 17 18 var inlineScopes = []string{"code", "link", "strong", "emphasis"} 19 20 // FilterEnv is the environment passed to the `--filter` flag. 21 type FilterEnv struct { 22 Rules []Definition 23 } 24 25 // Rule represents in individual writing construct to enforce. 26 type Rule interface { 27 Run(blk nlp.Block, file *core.File) ([]core.Alert, error) 28 Fields() Definition 29 Pattern() string 30 } 31 32 // Definition holds the common attributes of rule definitions. 33 type Definition struct { 34 Action core.Action 35 Description string 36 Extends string 37 Level string 38 Limit int 39 Link string 40 Message string 41 Name string 42 Scope []string 43 Selector Selector 44 } 45 46 var defaultStyles = []string{"Vale"} 47 var extensionPoints = []string{ 48 "capitalization", 49 "conditional", 50 "consistency", 51 "existence", 52 "occurrence", 53 "repetition", 54 "substitution", 55 "readability", 56 "spelling", 57 "sequence", 58 "metric", 59 "script", 60 } 61 var defaultRules = map[string]map[string]interface{}{ 62 "Avoid": { 63 "extends": "existence", 64 "name": "Vale.Avoid", 65 "level": "error", 66 "message": "Avoid using '%s'.", 67 "scope": "text", 68 "ignorecase": false, 69 "tokens": []string{}, 70 "path": "internal", 71 }, 72 "Terms": { 73 "extends": "substitution", 74 "name": "Vale.Terms", 75 "level": "error", 76 "message": "Use '%s' instead of '%s'.", 77 "scope": "text", 78 "ignorecase": true, 79 "swap": map[string]string{}, 80 "vocab": false, 81 "path": "internal", 82 }, 83 "Repetition": { 84 "extends": "repetition", 85 "name": "Vale.Repetition", 86 "level": "error", 87 "message": "'%s' is repeated!", 88 "scope": "text", 89 "alpha": true, 90 "action": core.Action{ 91 Name: "edit", 92 Params: []string{"truncate", " "}, 93 }, 94 "tokens": []string{`[^\s]+`}, 95 "path": "internal", 96 }, 97 "Spelling": { 98 "extends": "spelling", 99 "name": "Vale.Spelling", 100 "message": "Did you really mean '%s'?", 101 "level": "error", 102 "scope": "text", 103 "action": core.Action{ 104 Name: "suggest", 105 Params: []string{"spellings"}, 106 }, 107 "ignore": []interface{}{}, 108 "path": "internal", 109 }, 110 } 111 112 const ( 113 ignoreCase = `(?i)` 114 wordTemplate = `(?m)\b(?:%s)\b` 115 nonwordTemplate = `(?m)(?:%s)` 116 tokenTemplate = `^(?:%s)$` //nolint:gosec 117 ) 118 119 type baseCheck map[string]interface{} 120 121 func buildRule(cfg *core.Config, generic baseCheck) (Rule, error) { 122 path, ok := generic["path"].(string) 123 if !ok { 124 msg := fmt.Errorf("'%v' is not valid", generic) 125 return Existence{}, core.NewE100("buildRule: path", msg) 126 } 127 128 name, ok := generic["extends"].(string) 129 if !ok { 130 name = "unknown" 131 } 132 133 delete(generic, "path") 134 switch name { 135 case "existence": 136 return NewExistence(cfg, generic, path) 137 case "substitution": 138 return NewSubstitution(cfg, generic, path) 139 case "capitalization": 140 return NewCapitalization(cfg, generic, path) 141 case "occurrence": 142 return NewOccurrence(cfg, generic, path) 143 case "spelling": 144 return NewSpelling(cfg, generic, path) 145 case "repetition": 146 return NewRepetition(cfg, generic, path) 147 case "readability": 148 return NewReadability(cfg, generic, path) 149 case "conditional": 150 return NewConditional(cfg, generic, path) 151 case "consistency": 152 return NewConsistency(cfg, generic, path) 153 case "sequence": 154 return NewSequence(cfg, generic, path) 155 case "metric": 156 return NewMetric(cfg, generic, path) 157 case "script": 158 return NewScript(cfg, generic, path) 159 default: 160 return Existence{}, core.NewE201FromTarget( 161 fmt.Sprintf("'extends' key must be one of %v.", extensionPoints), 162 name, 163 path) 164 } 165 } 166 167 func formatMessages(msg string, desc string, subs ...string) (string, string) { 168 return core.FormatMessage(msg, subs...), core.FormatMessage(desc, subs...) 169 } 170 171 // NOTE: We need to do this because regexp2, the library we use for extended 172 // syntax, returns its locatons in *rune* offsets. 173 func re2Loc(s string, loc []int) (string, error) { 174 converted := []rune(s) 175 176 size := len(converted) 177 if loc[0] < 0 || loc[1] > size { 178 msg := fmt.Errorf("%d (%d:%d)", size, loc[0], loc[1]) 179 return "", core.NewE100("re2loc: bounds", msg) 180 } 181 182 return string(converted[loc[0]:loc[1]]), nil 183 } 184 185 func makeAlert(chk Definition, loc []int, txt string) (core.Alert, error) { 186 match, err := re2Loc(txt, loc) 187 if err != nil { 188 return core.Alert{}, err 189 } 190 191 a := core.Alert{ 192 Check: chk.Name, Severity: chk.Level, Span: loc, Link: chk.Link, 193 Match: match, Action: chk.Action} 194 a.Message, a.Description = formatMessages(chk.Message, chk.Description, match) 195 196 return a, nil 197 } 198 199 func parse(file []byte, path string) (map[string]interface{}, error) { 200 generic := map[string]interface{}{} 201 202 if err := yaml.Unmarshal(file, &generic); err != nil { 203 r := regexp.MustCompile(`yaml: line (\d+): (.+)`) 204 if r.MatchString(err.Error()) { 205 groups := r.FindStringSubmatch(err.Error()) 206 i, erri := strconv.Atoi(groups[1]) 207 if erri != nil { 208 return generic, core.NewE100("addCheck/Atoi", erri) 209 } 210 return generic, core.NewE201FromPosition(groups[2], path, i) 211 } 212 } else if err = validateDefinition(generic, path); err != nil { 213 return generic, err 214 } 215 216 return generic, nil 217 } 218 219 func validateDefinition(generic map[string]interface{}, path string) error { 220 if point, ok := generic["extends"]; !ok || point == nil { 221 return core.NewE201FromPosition( 222 "Missing the required 'extends' key.", 223 path, 224 1) 225 } else if !core.StringInSlice(point.(string), extensionPoints) { 226 key, _ := point.(string) 227 return core.NewE201FromTarget( 228 fmt.Sprintf("'extends' key must be one of %v.", extensionPoints), 229 key, 230 path) 231 } 232 233 if _, ok := generic["message"]; !ok { 234 return core.NewE201FromPosition( 235 "Missing the required 'message' key.", 236 path, 237 1) 238 } 239 240 if level, ok := generic["level"]; ok { 241 if level == nil || !core.StringInSlice(level.(string), core.AlertLevels) { 242 return core.NewE201FromTarget( 243 fmt.Sprintf("'level' must be one of %v", core.AlertLevels), 244 "level", 245 path) 246 } 247 } 248 249 if generic["code"] != nil && generic["code"].(bool) { 250 return core.NewE201FromTarget( 251 "`code` is deprecated; please use `scope: raw` instead.", 252 "code", 253 path) 254 } 255 256 return nil 257 } 258 259 func readStructureError(err error, path string) error { 260 r1 := regexp.MustCompile(`\* '(.+)' (.+)`) 261 r2 := regexp.MustCompile(`\* '(?:.*)' (.*): (\w+)`) 262 if r1.MatchString(err.Error()) { 263 groups := r1.FindStringSubmatch(err.Error()) 264 return core.NewE201FromTarget( 265 groups[2], 266 strings.ToLower(groups[1]), 267 path) 268 } else if r2.MatchString(err.Error()) { 269 groups := r2.FindStringSubmatch(err.Error()) 270 return core.NewE201FromTarget( 271 fmt.Sprintf("%s: '%s'", groups[1], groups[2]), 272 strings.ToLower(groups[2]), 273 path) 274 } 275 return core.NewE201FromPosition(err.Error(), path, 1) 276 } 277 278 func makeRegexp( 279 template string, 280 noCase bool, 281 word func() bool, 282 callback func() string, 283 shouldAppend bool, 284 ) string { 285 regex := "" 286 287 if word() { 288 if template != "" { 289 regex += template 290 } else { 291 regex += wordTemplate 292 } 293 } else { 294 regex += nonwordTemplate 295 } 296 297 if shouldAppend { 298 regex += callback() 299 } else { 300 regex = callback() + regex 301 } 302 303 if noCase { 304 regex = ignoreCase + regex 305 } 306 307 return regex 308 } 309 310 func matchToken(expected, observed string, ignorecase bool) bool { 311 p := expected 312 if ignorecase { 313 p = ignoreCase + p 314 } 315 316 r, err := regexp2.CompileStd(fmt.Sprintf(tokenTemplate, p)) 317 if core.IsPhrase(expected) || err != nil { 318 return expected == observed 319 } 320 return r.MatchStringStd(observed) 321 } 322 323 func updateExceptions(previous []string, current []string, vocab bool) (*regexp2.Regexp, error) { 324 if vocab { 325 previous = append(previous, current...) 326 } 327 328 // NOTE: This is required to ensure that we have greedy alternation. 329 sort.Slice(previous, func(p, q int) bool { 330 return len(previous[p]) > len(previous[q]) 331 }) 332 333 // NOTE: We need to add `(?-i)` to each term that doesn't already have it, 334 // otherwise any instance of the `(?i)` flag will be set for the entire 335 // expression. 336 for i, term := range previous { 337 if !strings.HasPrefix(term, "(?i)") { 338 previous[i] = fmt.Sprintf("(?-i)%s", term) 339 } 340 } 341 342 regex := makeRegexp( 343 "", 344 false, 345 func() bool { return true }, 346 func() string { return "" }, 347 true) 348 349 regex = fmt.Sprintf(regex, strings.Join(previous, "|")) 350 if len(previous) > 0 { 351 return regexp2.CompileStd(regex) 352 } 353 354 return ®exp2.Regexp{}, nil 355 } 356 357 func decodeRule(input interface{}, output interface{}) error { 358 config := mapstructure.DecoderConfig{ 359 ErrorUnused: true, 360 Squash: true, 361 WeaklyTypedInput: true, 362 Result: output, 363 } 364 365 decoder, err := mapstructure.NewDecoder(&config) 366 if err != nil { 367 return err 368 } 369 370 return decoder.Decode(input) 371 } 372 373 func checkScopes(scopes []string, path string) error { 374 for _, scope := range scopes { 375 if strings.Contains(scope, "&") { 376 // FIXME: multi part ... 377 continue 378 } 379 380 // Negation ... 381 scope = strings.TrimPrefix(scope, "~") 382 383 // Specification ... 384 // 385 // TODO: check sub-scopes too? 386 scope = strings.Split(scope, ".")[0] 387 388 if core.StringInSlice(scope, inlineScopes) { 389 return core.NewE201FromTarget( 390 fmt.Sprintf("scope '%v' is no longer supported; use 'raw' instead.", scope), 391 "scope", 392 path) 393 } else if !core.StringInSlice(scope, allowedScopes) { 394 return core.NewE201FromTarget( 395 fmt.Sprintf("'%v' is not a valid scope; must be one of %v", scope, allowedScopes), 396 "scope", 397 path) 398 } 399 } 400 401 return nil 402 }