github.com/errata-ai/vale/v3@v3.4.2/internal/check/definition.go (about)

     1  package check
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"sort"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/errata-ai/regexp2"
    11  	"github.com/mitchellh/mapstructure"
    12  	"gopkg.in/yaml.v2"
    13  
    14  	"github.com/errata-ai/vale/v3/internal/core"
    15  	"github.com/errata-ai/vale/v3/internal/nlp"
    16  )
    17  
    18  var inlineScopes = []string{"code", "link", "strong", "emphasis"}
    19  
    20  // FilterEnv is the environment passed to the `--filter` flag.
    21  type FilterEnv struct {
    22  	Rules []Definition
    23  }
    24  
    25  // Rule represents in individual writing construct to enforce.
    26  type Rule interface {
    27  	Run(blk nlp.Block, file *core.File) ([]core.Alert, error)
    28  	Fields() Definition
    29  	Pattern() string
    30  }
    31  
    32  // Definition holds the common attributes of rule definitions.
    33  type Definition struct {
    34  	Action      core.Action
    35  	Description string
    36  	Extends     string
    37  	Level       string
    38  	Limit       int
    39  	Link        string
    40  	Message     string
    41  	Name        string
    42  	Scope       []string
    43  	Selector    Selector
    44  }
    45  
    46  var defaultStyles = []string{"Vale"}
    47  var extensionPoints = []string{
    48  	"capitalization",
    49  	"conditional",
    50  	"consistency",
    51  	"existence",
    52  	"occurrence",
    53  	"repetition",
    54  	"substitution",
    55  	"readability",
    56  	"spelling",
    57  	"sequence",
    58  	"metric",
    59  	"script",
    60  }
    61  var defaultRules = map[string]map[string]interface{}{
    62  	"Avoid": {
    63  		"extends":    "existence",
    64  		"name":       "Vale.Avoid",
    65  		"level":      "error",
    66  		"message":    "Avoid using '%s'.",
    67  		"scope":      "text",
    68  		"ignorecase": false,
    69  		"tokens":     []string{},
    70  		"path":       "internal",
    71  	},
    72  	"Terms": {
    73  		"extends":    "substitution",
    74  		"name":       "Vale.Terms",
    75  		"level":      "error",
    76  		"message":    "Use '%s' instead of '%s'.",
    77  		"scope":      "text",
    78  		"ignorecase": true,
    79  		"swap":       map[string]string{},
    80  		"vocab":      false,
    81  		"path":       "internal",
    82  	},
    83  	"Repetition": {
    84  		"extends": "repetition",
    85  		"name":    "Vale.Repetition",
    86  		"level":   "error",
    87  		"message": "'%s' is repeated!",
    88  		"scope":   "text",
    89  		"alpha":   true,
    90  		"action": core.Action{
    91  			Name:   "edit",
    92  			Params: []string{"truncate", " "},
    93  		},
    94  		"tokens": []string{`[^\s]+`},
    95  		"path":   "internal",
    96  	},
    97  	"Spelling": {
    98  		"extends": "spelling",
    99  		"name":    "Vale.Spelling",
   100  		"message": "Did you really mean '%s'?",
   101  		"level":   "error",
   102  		"scope":   "text",
   103  		"action": core.Action{
   104  			Name:   "suggest",
   105  			Params: []string{"spellings"},
   106  		},
   107  		"ignore": []interface{}{},
   108  		"path":   "internal",
   109  	},
   110  }
   111  
   112  const (
   113  	ignoreCase      = `(?i)`
   114  	wordTemplate    = `(?m)\b(?:%s)\b`
   115  	nonwordTemplate = `(?m)(?:%s)`
   116  	tokenTemplate   = `^(?:%s)$` //nolint:gosec
   117  )
   118  
   119  type baseCheck map[string]interface{}
   120  
   121  func buildRule(cfg *core.Config, generic baseCheck) (Rule, error) {
   122  	path, ok := generic["path"].(string)
   123  	if !ok {
   124  		msg := fmt.Errorf("'%v' is not valid", generic)
   125  		return Existence{}, core.NewE100("buildRule: path", msg)
   126  	}
   127  
   128  	name, ok := generic["extends"].(string)
   129  	if !ok {
   130  		name = "unknown"
   131  	}
   132  
   133  	delete(generic, "path")
   134  	switch name {
   135  	case "existence":
   136  		return NewExistence(cfg, generic, path)
   137  	case "substitution":
   138  		return NewSubstitution(cfg, generic, path)
   139  	case "capitalization":
   140  		return NewCapitalization(cfg, generic, path)
   141  	case "occurrence":
   142  		return NewOccurrence(cfg, generic, path)
   143  	case "spelling":
   144  		return NewSpelling(cfg, generic, path)
   145  	case "repetition":
   146  		return NewRepetition(cfg, generic, path)
   147  	case "readability":
   148  		return NewReadability(cfg, generic, path)
   149  	case "conditional":
   150  		return NewConditional(cfg, generic, path)
   151  	case "consistency":
   152  		return NewConsistency(cfg, generic, path)
   153  	case "sequence":
   154  		return NewSequence(cfg, generic, path)
   155  	case "metric":
   156  		return NewMetric(cfg, generic, path)
   157  	case "script":
   158  		return NewScript(cfg, generic, path)
   159  	default:
   160  		return Existence{}, core.NewE201FromTarget(
   161  			fmt.Sprintf("'extends' key must be one of %v.", extensionPoints),
   162  			name,
   163  			path)
   164  	}
   165  }
   166  
   167  func formatMessages(msg string, desc string, subs ...string) (string, string) {
   168  	return core.FormatMessage(msg, subs...), core.FormatMessage(desc, subs...)
   169  }
   170  
   171  // NOTE: We need to do this because regexp2, the library we use for extended
   172  // syntax, returns its locatons in *rune* offsets.
   173  func re2Loc(s string, loc []int) (string, error) {
   174  	converted := []rune(s)
   175  
   176  	size := len(converted)
   177  	if loc[0] < 0 || loc[1] > size {
   178  		msg := fmt.Errorf("%d (%d:%d)", size, loc[0], loc[1])
   179  		return "", core.NewE100("re2loc: bounds", msg)
   180  	}
   181  
   182  	return string(converted[loc[0]:loc[1]]), nil
   183  }
   184  
   185  func makeAlert(chk Definition, loc []int, txt string) (core.Alert, error) {
   186  	match, err := re2Loc(txt, loc)
   187  	if err != nil {
   188  		return core.Alert{}, err
   189  	}
   190  
   191  	a := core.Alert{
   192  		Check: chk.Name, Severity: chk.Level, Span: loc, Link: chk.Link,
   193  		Match: match, Action: chk.Action}
   194  	a.Message, a.Description = formatMessages(chk.Message, chk.Description, match)
   195  
   196  	return a, nil
   197  }
   198  
   199  func parse(file []byte, path string) (map[string]interface{}, error) {
   200  	generic := map[string]interface{}{}
   201  
   202  	if err := yaml.Unmarshal(file, &generic); err != nil {
   203  		r := regexp.MustCompile(`yaml: line (\d+): (.+)`)
   204  		if r.MatchString(err.Error()) {
   205  			groups := r.FindStringSubmatch(err.Error())
   206  			i, erri := strconv.Atoi(groups[1])
   207  			if erri != nil {
   208  				return generic, core.NewE100("addCheck/Atoi", erri)
   209  			}
   210  			return generic, core.NewE201FromPosition(groups[2], path, i)
   211  		}
   212  	} else if err = validateDefinition(generic, path); err != nil {
   213  		return generic, err
   214  	}
   215  
   216  	return generic, nil
   217  }
   218  
   219  func validateDefinition(generic map[string]interface{}, path string) error {
   220  	if point, ok := generic["extends"]; !ok || point == nil {
   221  		return core.NewE201FromPosition(
   222  			"Missing the required 'extends' key.",
   223  			path,
   224  			1)
   225  	} else if !core.StringInSlice(point.(string), extensionPoints) {
   226  		key, _ := point.(string)
   227  		return core.NewE201FromTarget(
   228  			fmt.Sprintf("'extends' key must be one of %v.", extensionPoints),
   229  			key,
   230  			path)
   231  	}
   232  
   233  	if _, ok := generic["message"]; !ok {
   234  		return core.NewE201FromPosition(
   235  			"Missing the required 'message' key.",
   236  			path,
   237  			1)
   238  	}
   239  
   240  	if level, ok := generic["level"]; ok {
   241  		if level == nil || !core.StringInSlice(level.(string), core.AlertLevels) {
   242  			return core.NewE201FromTarget(
   243  				fmt.Sprintf("'level' must be one of %v", core.AlertLevels),
   244  				"level",
   245  				path)
   246  		}
   247  	}
   248  
   249  	if generic["code"] != nil && generic["code"].(bool) {
   250  		return core.NewE201FromTarget(
   251  			"`code` is deprecated; please use `scope: raw` instead.",
   252  			"code",
   253  			path)
   254  	}
   255  
   256  	return nil
   257  }
   258  
   259  func readStructureError(err error, path string) error {
   260  	r1 := regexp.MustCompile(`\* '(.+)' (.+)`)
   261  	r2 := regexp.MustCompile(`\* '(?:.*)' (.*): (\w+)`)
   262  	if r1.MatchString(err.Error()) {
   263  		groups := r1.FindStringSubmatch(err.Error())
   264  		return core.NewE201FromTarget(
   265  			groups[2],
   266  			strings.ToLower(groups[1]),
   267  			path)
   268  	} else if r2.MatchString(err.Error()) {
   269  		groups := r2.FindStringSubmatch(err.Error())
   270  		return core.NewE201FromTarget(
   271  			fmt.Sprintf("%s: '%s'", groups[1], groups[2]),
   272  			strings.ToLower(groups[2]),
   273  			path)
   274  	}
   275  	return core.NewE201FromPosition(err.Error(), path, 1)
   276  }
   277  
   278  func makeRegexp(
   279  	template string,
   280  	noCase bool,
   281  	word func() bool,
   282  	callback func() string,
   283  	shouldAppend bool,
   284  ) string {
   285  	regex := ""
   286  
   287  	if word() {
   288  		if template != "" {
   289  			regex += template
   290  		} else {
   291  			regex += wordTemplate
   292  		}
   293  	} else {
   294  		regex += nonwordTemplate
   295  	}
   296  
   297  	if shouldAppend {
   298  		regex += callback()
   299  	} else {
   300  		regex = callback() + regex
   301  	}
   302  
   303  	if noCase {
   304  		regex = ignoreCase + regex
   305  	}
   306  
   307  	return regex
   308  }
   309  
   310  func matchToken(expected, observed string, ignorecase bool) bool {
   311  	p := expected
   312  	if ignorecase {
   313  		p = ignoreCase + p
   314  	}
   315  
   316  	r, err := regexp2.CompileStd(fmt.Sprintf(tokenTemplate, p))
   317  	if core.IsPhrase(expected) || err != nil {
   318  		return expected == observed
   319  	}
   320  	return r.MatchStringStd(observed)
   321  }
   322  
   323  func updateExceptions(previous []string, current []string, vocab bool) (*regexp2.Regexp, error) {
   324  	if vocab {
   325  		previous = append(previous, current...)
   326  	}
   327  
   328  	// NOTE: This is required to ensure that we have greedy alternation.
   329  	sort.Slice(previous, func(p, q int) bool {
   330  		return len(previous[p]) > len(previous[q])
   331  	})
   332  
   333  	// NOTE: We need to add `(?-i)` to each term that doesn't already have it,
   334  	// otherwise any instance of the `(?i)` flag will be set for the entire
   335  	// expression.
   336  	for i, term := range previous {
   337  		if !strings.HasPrefix(term, "(?i)") {
   338  			previous[i] = fmt.Sprintf("(?-i)%s", term)
   339  		}
   340  	}
   341  
   342  	regex := makeRegexp(
   343  		"",
   344  		false,
   345  		func() bool { return true },
   346  		func() string { return "" },
   347  		true)
   348  
   349  	regex = fmt.Sprintf(regex, strings.Join(previous, "|"))
   350  	if len(previous) > 0 {
   351  		return regexp2.CompileStd(regex)
   352  	}
   353  
   354  	return &regexp2.Regexp{}, nil
   355  }
   356  
   357  func decodeRule(input interface{}, output interface{}) error {
   358  	config := mapstructure.DecoderConfig{
   359  		ErrorUnused:      true,
   360  		Squash:           true,
   361  		WeaklyTypedInput: true,
   362  		Result:           output,
   363  	}
   364  
   365  	decoder, err := mapstructure.NewDecoder(&config)
   366  	if err != nil {
   367  		return err
   368  	}
   369  
   370  	return decoder.Decode(input)
   371  }
   372  
   373  func checkScopes(scopes []string, path string) error {
   374  	for _, scope := range scopes {
   375  		if strings.Contains(scope, "&") {
   376  			// FIXME: multi part ...
   377  			continue
   378  		}
   379  
   380  		// Negation ...
   381  		scope = strings.TrimPrefix(scope, "~")
   382  
   383  		// Specification ...
   384  		//
   385  		// TODO: check sub-scopes too?
   386  		scope = strings.Split(scope, ".")[0]
   387  
   388  		if core.StringInSlice(scope, inlineScopes) {
   389  			return core.NewE201FromTarget(
   390  				fmt.Sprintf("scope '%v' is no longer supported; use 'raw' instead.", scope),
   391  				"scope",
   392  				path)
   393  		} else if !core.StringInSlice(scope, allowedScopes) {
   394  			return core.NewE201FromTarget(
   395  				fmt.Sprintf("'%v' is not a valid scope; must be one of %v", scope, allowedScopes),
   396  				"scope",
   397  				path)
   398  		}
   399  	}
   400  
   401  	return nil
   402  }