github.com/errata-ai/vale/v3@v3.4.2/internal/spell/words.go (about)

     1  package spell
     2  
     3  import (
     4  	"regexp"
     5  	"strings"
     6  	"unicode"
     7  )
     8  
     9  // number form, may include dots, commas and dashes
    10  var numberRegexp = regexp.MustCompile("^([0-9]+[.,-]?)+$")
    11  
    12  // number form with units, e.g. 123ms, 12in  1ft
    13  var numberUnitsRegexp = regexp.MustCompile("^[0-9]+[a-zA-Z]+$")
    14  
    15  // 0x12FF or 0x1B or x12FF
    16  // does anyone use 0XFF ??
    17  var numberHexRegexp = regexp.MustCompile("^0?[x][0-9A-Fa-f]+$")
    18  
    19  var numberBinaryRegexp = regexp.MustCompile("^0[b][01]+$")
    20  
    21  var shaHashRegexp = regexp.MustCompile("^[0-9a-z]{40}$")
    22  
    23  // splitter splits a text into words
    24  // Highly likely this implementation will change so we are encapsulating.
    25  type splitter struct {
    26  	fn func(c rune) bool
    27  }
    28  
    29  // newSplitter creates a new splitter.  The input is a string in
    30  // UTF-8 encoding.  Each rune in the string will be considered to be a
    31  // valid word character.  Runes that are NOT here are deemed a word
    32  // boundary Current implementation uses
    33  // https://golang.org/pkg/strings/#FieldsFunc
    34  func newSplitter(chars string) *splitter {
    35  	s := splitter{}
    36  	s.fn = (func(c rune) bool {
    37  		// break if it's not a letter, and not another special character
    38  		return !unicode.IsLetter(c) && !strings.ContainsRune(chars, c)
    39  	})
    40  	return &s
    41  }
    42  
    43  func isNumber(s string) bool {
    44  	return numberRegexp.MatchString(s)
    45  }
    46  
    47  func isNumberBinary(s string) bool {
    48  	return numberBinaryRegexp.MatchString(s)
    49  }
    50  
    51  // is word in the form of a "number with units", e.g. "101ms", "3ft",
    52  // "5GB" if true, return the units, if not return empty string This is
    53  // highly English based and not sure how applicable it is to other
    54  // languages.
    55  func isNumberUnits(s string) string {
    56  	// regexp.FindAllStringSubmatch is too confusing
    57  	if !numberUnitsRegexp.MatchString(s) {
    58  		return ""
    59  	}
    60  	// Starts with a number
    61  	for idx, ch := range s {
    62  		if ch >= '0' && ch <= '9' {
    63  			continue
    64  		}
    65  		return s[idx:]
    66  	}
    67  	panic("assertion failed")
    68  }
    69  
    70  func isNumberHex(s string) bool {
    71  	return numberHexRegexp.MatchString(s)
    72  }
    73  
    74  func isHash(s string) bool {
    75  	return shaHashRegexp.MatchString(s)
    76  }