github.com/errata-ai/vale/v3@v3.4.2/internal/check/capitalization.go (about)

     1  package check
     2  
     3  import (
     4  	"github.com/errata-ai/regexp2"
     5  	"github.com/jdkato/twine/strcase"
     6  
     7  	"github.com/errata-ai/vale/v3/internal/core"
     8  	"github.com/errata-ai/vale/v3/internal/nlp"
     9  )
    10  
    11  // Capitalization checks the case of a string.
    12  type Capitalization struct {
    13  	Definition `mapstructure:",squash"`
    14  	// `match` (`string`): $title, $sentence, $lower, $upper, or a pattern.
    15  	Match string
    16  	Check func(s string, re *regexp2.Regexp) (string, bool)
    17  	// `style` (`string`): AP or Chicago; only applies when match is set to
    18  	// $title.
    19  	Style string
    20  	// `exceptions` (`array`): An array of strings to be ignored.
    21  	Exceptions []string
    22  	// `indicators` (`array`): An array of suffixes that indicate the next
    23  	// token should be ignored.
    24  	Indicators []string
    25  	// `threshold` (`float`): The minimum proportion of words that must be
    26  	// (un)capitalized for a sentence to be considered correct.
    27  	Threshold float64
    28  	// `vocab` (`boolean`): If `true`, use the user's `Vocab` as a list of
    29  	// exceptions.
    30  	Vocab bool
    31  	// `prefix` (`string`): A prefix to be ignored when checking for
    32  	// capitalization.
    33  	Prefix string
    34  
    35  	exceptRe *regexp2.Regexp
    36  }
    37  
    38  // NewCapitalization creates a new `capitalization`-based rule.
    39  func NewCapitalization(cfg *core.Config, generic baseCheck, path string) (Capitalization, error) {
    40  	rule := Capitalization{Vocab: true}
    41  
    42  	err := decodeRule(generic, &rule)
    43  	if err != nil {
    44  		return rule, readStructureError(err, path)
    45  	}
    46  
    47  	err = checkScopes(rule.Scope, path)
    48  	if err != nil {
    49  		return rule, err
    50  	}
    51  
    52  	re, err := updateExceptions(rule.Exceptions, cfg.AcceptedTokens, rule.Vocab)
    53  	if err != nil {
    54  		return rule, core.NewE201FromPosition(err.Error(), path, 1)
    55  	}
    56  	rule.exceptRe = re
    57  
    58  	// NOTE: This is OK since setting `Threshold` to 0 would mean that the rule
    59  	// would never trigger. In other words, we wouldn't want the default to be
    60  	// 0 because that would be equivalent to disabling the rule.
    61  	//
    62  	// Also, we chose a default of 0.8 because it matches the behavior of the
    63  	// original implementation (pre-threshold).
    64  	if rule.Threshold == 0 {
    65  		rule.Threshold = 0.8
    66  	}
    67  
    68  	if rule.Vocab {
    69  		rule.Exceptions = append(rule.Exceptions, cfg.AcceptedTokens...)
    70  	}
    71  
    72  	if rule.Match == "$title" {
    73  		var tc *strcase.TitleConverter
    74  		if rule.Style == "Chicago" {
    75  			tc = strcase.NewTitleConverter(
    76  				strcase.ChicagoStyle,
    77  				strcase.UsingVocab(rule.Exceptions),
    78  				strcase.UsingPrefix(rule.Prefix),
    79  			)
    80  		} else {
    81  			tc = strcase.NewTitleConverter(
    82  				strcase.APStyle,
    83  				strcase.UsingVocab(rule.Exceptions),
    84  				strcase.UsingPrefix(rule.Prefix),
    85  			)
    86  		}
    87  		rule.Check = func(s string, re *regexp2.Regexp) (string, bool) {
    88  			return title(s, re, tc, rule.Threshold)
    89  		}
    90  	} else if rule.Match == "$sentence" {
    91  		sc := strcase.NewSentenceConverter(
    92  			strcase.UsingVocab(rule.Exceptions),
    93  			strcase.UsingPrefix(rule.Prefix),
    94  			strcase.UsingIndicator(wasIndicator(rule.Indicators)),
    95  		)
    96  		rule.Check = func(s string, re *regexp2.Regexp) (string, bool) {
    97  			return sentence(s, re, sc, rule.Threshold)
    98  		}
    99  	} else if f, ok := varToFunc[rule.Match]; ok {
   100  		rule.Check = f
   101  	} else {
   102  		re2, errc := regexp2.CompileStd(rule.Match)
   103  		if errc != nil {
   104  			return rule, core.NewE201FromPosition(errc.Error(), path, 1)
   105  		}
   106  		rule.Check = func(s string, r *regexp2.Regexp) (string, bool) {
   107  			return re2.String(), re2.MatchStringStd(s) || isMatch(r, s)
   108  		}
   109  	}
   110  
   111  	return rule, nil
   112  }
   113  
   114  // Run checks the capitalization style of the provided text.
   115  func (c Capitalization) Run(blk nlp.Block, _ *core.File) ([]core.Alert, error) {
   116  	alerts := []core.Alert{}
   117  
   118  	expected, matched := c.Check(blk.Text, c.exceptRe)
   119  	if !matched {
   120  		action := c.Fields().Action
   121  		if action.Name == "replace" && len(action.Params) == 0 {
   122  			// We can only do this for non-regex case styles:
   123  			if c.Match == "$title" || c.Match == "$sentence" {
   124  				action.Params = []string{expected}
   125  			}
   126  		}
   127  		pos := []int{0, nlp.StrLen(blk.Text)}
   128  
   129  		a, err := makeAlert(c.Definition, pos, blk.Text)
   130  		if err != nil {
   131  			return alerts, err
   132  		}
   133  
   134  		a.Message, a.Description = formatMessages(c.Message,
   135  			c.Description, blk.Text, expected)
   136  		a.Action = action
   137  
   138  		alerts = append(alerts, a)
   139  	}
   140  
   141  	return alerts, nil
   142  }
   143  
   144  // Fields provides access to the internal rule definition.
   145  func (c Capitalization) Fields() Definition {
   146  	return c.Definition
   147  }
   148  
   149  // Pattern is the internal regex pattern used by this rule.
   150  func (c Capitalization) Pattern() string {
   151  	return ""
   152  }