github.com/errata-ai/vale/v3@v3.4.2/internal/check/occurrence.go (about) 1 package check 2 3 import ( 4 "strconv" 5 "strings" 6 7 "github.com/errata-ai/regexp2" 8 9 "github.com/errata-ai/vale/v3/internal/core" 10 "github.com/errata-ai/vale/v3/internal/nlp" 11 ) 12 13 // Occurrence counts the number of times Token appears. 14 type Occurrence struct { 15 Definition `mapstructure:",squash"` 16 Token string 17 Max int 18 Min int 19 pattern *regexp2.Regexp 20 Ignorecase bool 21 } 22 23 // NewOccurrence creates a new `occurrence`-based rule. 24 func NewOccurrence(_ *core.Config, generic baseCheck, path string) (Occurrence, error) { 25 rule := Occurrence{} 26 27 err := decodeRule(generic, &rule) 28 if err != nil { 29 return rule, readStructureError(err, path) 30 } 31 32 err = checkScopes(rule.Scope, path) 33 if err != nil { 34 return rule, err 35 } 36 37 regex := "" 38 if rule.Ignorecase { 39 regex += ignoreCase 40 } 41 42 regex += `(?:` + rule.Token + `)` 43 re, err := regexp2.CompileStd(regex) 44 if err != nil { 45 return rule, core.NewE201FromPosition(err.Error(), path, 1) 46 } 47 48 rule.pattern = re 49 return rule, nil 50 } 51 52 // Run checks the number of occurrences of a user-defined regex against a 53 // certain threshold. 54 func (o Occurrence) Run(blk nlp.Block, _ *core.File) ([]core.Alert, error) { 55 var a core.Alert 56 var err error 57 var alerts []core.Alert 58 59 txt := blk.Text 60 locs := o.pattern.FindAllStringIndex(txt, -1) 61 62 occurrences := len(locs) 63 if (o.Max > 0 && occurrences > o.Max) || (o.Min > 0 && occurrences < o.Min) { 64 if occurrences == 0 { 65 // NOTE: We might not have a location to report -- i.e., by 66 // definition, having zero instances of a token may break a rule. 67 // 68 // In a case like this, the check essentially becomes 69 // document-scoped (like `readability`), so we mark the issue at 70 // the first line. 71 a = core.Alert{ 72 Check: o.Name, Severity: o.Level, Span: []int{1, 1}, 73 Link: o.Link} 74 } else { 75 span := []int{} 76 77 // We look for the first non-code match. 78 // 79 // Previously, we would just use the first match, but this could 80 // lead to false positives if the first match was in a code-like 81 // token. 82 // 83 // We also can't use the entire scope (`txt`) without risking 84 // having to fall back to string matching. 85 for _, loc := range locs { 86 m, rErr := re2Loc(txt, loc) 87 if rErr != nil || strings.TrimSpace(m) == "" { 88 continue 89 } else if !core.IsCode(m) { 90 span = loc 91 break 92 } 93 } 94 95 // If we can't find a non-code match, we return early. 96 // 97 // The alternative here is to use `scope: raw`. 98 if len(span) == 0 { 99 return alerts, nil 100 } 101 102 a, err = makeAlert(o.Definition, span, txt) 103 if err != nil { 104 return alerts, err 105 } 106 } 107 108 a.Message, a.Description = formatMessages(o.Message, o.Description, 109 strconv.Itoa(occurrences)) 110 alerts = append(alerts, a) 111 } 112 113 return alerts, nil 114 } 115 116 // Fields provides access to the internal rule definition. 117 func (o Occurrence) Fields() Definition { 118 return o.Definition 119 } 120 121 // Pattern is the internal regex pattern used by this rule. 122 func (o Occurrence) Pattern() string { 123 return o.pattern.String() 124 }