github.com/errata-ai/vale/v3@v3.4.2/internal/check/occurrence.go (about)

     1  package check
     2  
     3  import (
     4  	"strconv"
     5  	"strings"
     6  
     7  	"github.com/errata-ai/regexp2"
     8  
     9  	"github.com/errata-ai/vale/v3/internal/core"
    10  	"github.com/errata-ai/vale/v3/internal/nlp"
    11  )
    12  
    13  // Occurrence counts the number of times Token appears.
    14  type Occurrence struct {
    15  	Definition `mapstructure:",squash"`
    16  	Token      string
    17  	Max        int
    18  	Min        int
    19  	pattern    *regexp2.Regexp
    20  	Ignorecase bool
    21  }
    22  
    23  // NewOccurrence creates a new `occurrence`-based rule.
    24  func NewOccurrence(_ *core.Config, generic baseCheck, path string) (Occurrence, error) {
    25  	rule := Occurrence{}
    26  
    27  	err := decodeRule(generic, &rule)
    28  	if err != nil {
    29  		return rule, readStructureError(err, path)
    30  	}
    31  
    32  	err = checkScopes(rule.Scope, path)
    33  	if err != nil {
    34  		return rule, err
    35  	}
    36  
    37  	regex := ""
    38  	if rule.Ignorecase {
    39  		regex += ignoreCase
    40  	}
    41  
    42  	regex += `(?:` + rule.Token + `)`
    43  	re, err := regexp2.CompileStd(regex)
    44  	if err != nil {
    45  		return rule, core.NewE201FromPosition(err.Error(), path, 1)
    46  	}
    47  
    48  	rule.pattern = re
    49  	return rule, nil
    50  }
    51  
    52  // Run checks the number of occurrences of a user-defined regex against a
    53  // certain threshold.
    54  func (o Occurrence) Run(blk nlp.Block, _ *core.File) ([]core.Alert, error) {
    55  	var a core.Alert
    56  	var err error
    57  	var alerts []core.Alert
    58  
    59  	txt := blk.Text
    60  	locs := o.pattern.FindAllStringIndex(txt, -1)
    61  
    62  	occurrences := len(locs)
    63  	if (o.Max > 0 && occurrences > o.Max) || (o.Min > 0 && occurrences < o.Min) {
    64  		if occurrences == 0 {
    65  			// NOTE: We might not have a location to report -- i.e., by
    66  			// definition, having zero instances of a token may break a rule.
    67  			//
    68  			// In a case like this, the check essentially becomes
    69  			// document-scoped (like `readability`), so we mark the issue at
    70  			// the first line.
    71  			a = core.Alert{
    72  				Check: o.Name, Severity: o.Level, Span: []int{1, 1},
    73  				Link: o.Link}
    74  		} else {
    75  			span := []int{}
    76  
    77  			// We look for the first non-code match.
    78  			//
    79  			// Previously, we would just use the first match, but this could
    80  			// lead to false positives if the first match was in a code-like
    81  			// token.
    82  			//
    83  			// We also can't use the entire scope (`txt`) without risking
    84  			// having to fall back to string matching.
    85  			for _, loc := range locs {
    86  				m, rErr := re2Loc(txt, loc)
    87  				if rErr != nil || strings.TrimSpace(m) == "" {
    88  					continue
    89  				} else if !core.IsCode(m) {
    90  					span = loc
    91  					break
    92  				}
    93  			}
    94  
    95  			// If we can't find a non-code match, we return early.
    96  			//
    97  			// The alternative here is to use `scope: raw`.
    98  			if len(span) == 0 {
    99  				return alerts, nil
   100  			}
   101  
   102  			a, err = makeAlert(o.Definition, span, txt)
   103  			if err != nil {
   104  				return alerts, err
   105  			}
   106  		}
   107  
   108  		a.Message, a.Description = formatMessages(o.Message, o.Description,
   109  			strconv.Itoa(occurrences))
   110  		alerts = append(alerts, a)
   111  	}
   112  
   113  	return alerts, nil
   114  }
   115  
   116  // Fields provides access to the internal rule definition.
   117  func (o Occurrence) Fields() Definition {
   118  	return o.Definition
   119  }
   120  
   121  // Pattern is the internal regex pattern used by this rule.
   122  func (o Occurrence) Pattern() string {
   123  	return o.pattern.String()
   124  }