github.com/errata-ai/vale/v3@v3.4.2/internal/core/util.go (about)

     1  package core
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"regexp"
    10  	"strings"
    11  	"unicode"
    12  
    13  	"github.com/errata-ai/vale/v3/internal/nlp"
    14  )
    15  
    16  var defaultIgnoreDirectories = []string{
    17  	"node_modules", ".git",
    18  }
    19  var spaces = regexp.MustCompile(" +")
    20  var reANSI = regexp.MustCompile("[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))")
    21  var sanitizer = strings.NewReplacer(
    22  	"&rsquo;", "'",
    23  	"\r\n", "\n",
    24  	"\r", "\n")
    25  
    26  // CapFirst capitalizes the first letter of a string.
    27  func CapFirst(s string) string {
    28  	if len(s) == 0 {
    29  		return s
    30  	}
    31  	return strings.ToUpper(s[:1]) + s[1:]
    32  }
    33  
    34  // Sanitize prepares text for our check functions.
    35  func Sanitize(txt string) string {
    36  	// TODO: symbols?
    37  	return sanitizer.Replace(txt)
    38  }
    39  
    40  // StripANSI removes all ANSI characters from the given string.
    41  func StripANSI(s string) string {
    42  	return reANSI.ReplaceAllString(s, "")
    43  }
    44  
    45  // WhitespaceToSpace converts newlines and multiple spaces (e.g., "  ") into a
    46  // single space.
    47  func WhitespaceToSpace(msg string) string {
    48  	msg = strings.ReplaceAll(msg, "\n", " ")
    49  	msg = spaces.ReplaceAllString(msg, " ")
    50  	return msg
    51  }
    52  
    53  // ShouldIgnoreDirectory will check if directory should be ignored
    54  func ShouldIgnoreDirectory(directoryName string) bool {
    55  	for _, directory := range defaultIgnoreDirectories {
    56  		if directory == directoryName {
    57  			return true
    58  		}
    59  	}
    60  	return false
    61  }
    62  
    63  // ToSentence converts a slice of terms into sentence.
    64  func ToSentence(words []string, andOrOr string) string {
    65  	l := len(words)
    66  
    67  	if l == 1 {
    68  		return fmt.Sprintf("'%s'", words[0])
    69  	} else if l == 2 {
    70  		return fmt.Sprintf("'%s' or '%s'", words[0], words[1])
    71  	}
    72  
    73  	wordsForSentence := []string{}
    74  	for _, w := range words {
    75  		wordsForSentence = append(wordsForSentence, fmt.Sprintf("'%s'", w))
    76  	}
    77  
    78  	wordsForSentence[l-1] = andOrOr + " " + wordsForSentence[l-1]
    79  	return strings.Join(wordsForSentence, ", ")
    80  }
    81  
    82  // IsLetter returns `true` if s contains all letter characters and false if not.
    83  func IsLetter(s string) bool {
    84  	for _, r := range s {
    85  		if !unicode.IsLetter(r) {
    86  			return false
    87  		}
    88  	}
    89  	return true
    90  }
    91  
    92  // IsCode returns `true` if s is a code-like token.
    93  func IsCode(s string) bool {
    94  	for _, r := range s {
    95  		if r != '*' && r != '@' {
    96  			return false
    97  		}
    98  	}
    99  	return true
   100  }
   101  
   102  // IsPhrase returns `true` is s is a phrase-like token.
   103  //
   104  // This is used to differentiate regex tokens from non-regex.
   105  func IsPhrase(s string) bool {
   106  	for _, r := range s {
   107  		if !unicode.IsLetter(r) && r != ' ' && !unicode.IsDigit(r) && r != '-' {
   108  			return false
   109  		}
   110  	}
   111  	return true
   112  }
   113  
   114  // InRange determines if the range r contains the integer n.
   115  func InRange(n int, r []int) bool {
   116  	return len(r) == 2 && (r[0] <= n && n <= r[1])
   117  }
   118  
   119  // Which checks for the existence of any command in `cmds`.
   120  func Which(cmds []string) string {
   121  	for _, cmd := range cmds {
   122  		path, err := exec.LookPath(cmd)
   123  		if err == nil {
   124  			return path
   125  		}
   126  	}
   127  	return ""
   128  }
   129  
   130  // CondSprintf is sprintf, ignores extra arguments.
   131  func CondSprintf(format string, v ...interface{}) string {
   132  	v = append(v, "")
   133  	format += fmt.Sprint("%[", len(v), "]s")
   134  	return fmt.Sprintf(format, v...)
   135  }
   136  
   137  // FormatMessage inserts `subs` into `msg`.
   138  func FormatMessage(msg string, subs ...string) string {
   139  	return CondSprintf(msg, StringsToInterface(subs)...)
   140  }
   141  
   142  // Substitute replaces the substring `sub` with a string of asterisks.
   143  func Substitute(src, sub string, char rune) (string, bool) {
   144  	idx := strings.Index(src, sub)
   145  	if idx < 0 {
   146  		return src, false
   147  	}
   148  	repl := strings.Map(func(r rune) rune {
   149  		if r != '\n' {
   150  			return char
   151  		}
   152  		return r
   153  	}, sub)
   154  	return strings.Replace(src, sub, repl, 1), true
   155  }
   156  
   157  // StringsToInterface converts a slice of strings to an interface.
   158  func StringsToInterface(strings []string) []interface{} {
   159  	intf := make([]interface{}, len(strings))
   160  	for i, v := range strings {
   161  		intf[i] = v
   162  	}
   163  	return intf
   164  }
   165  
   166  // Indent adds padding to every line of `text`.
   167  func Indent(text, indent string) string {
   168  	if text[len(text)-1:] == "\n" {
   169  		result := ""
   170  		for _, j := range strings.Split(text[:len(text)-1], "\n") {
   171  			result += indent + j + "\n"
   172  		}
   173  		return result
   174  	}
   175  	result := ""
   176  	for _, j := range strings.Split(strings.TrimRight(text, "\n"), "\n") {
   177  		result += indent + j + "\n"
   178  	}
   179  	return result[:len(result)-1]
   180  }
   181  
   182  // IsDir determines if the path given by `filename` is a directory.
   183  func IsDir(filename string) bool {
   184  	fi, err := os.Stat(filename)
   185  	return err == nil && fi.IsDir()
   186  }
   187  
   188  // FileExists determines if the path given by `filename` exists.
   189  func FileExists(filename string) bool {
   190  	_, err := os.Stat(filename)
   191  	return err == nil
   192  }
   193  
   194  // StringInSlice determines if `slice` contains the string `a`.
   195  func StringInSlice(a string, slice []string) bool {
   196  	for _, b := range slice {
   197  		if a == b {
   198  			return true
   199  		}
   200  	}
   201  	return false
   202  }
   203  
   204  // IntInSlice determines if `slice` contains the int `a`.
   205  func IntInSlice(a int, slice []int) bool {
   206  	for _, b := range slice {
   207  		if a == b {
   208  			return true
   209  		}
   210  	}
   211  	return false
   212  }
   213  
   214  // AllStringsInSlice determines if `slice` contains the `strings`.
   215  func AllStringsInSlice(strings []string, slice []string) bool {
   216  	for _, s := range strings {
   217  		if !StringInSlice(s, slice) {
   218  			return false
   219  		}
   220  	}
   221  	return true
   222  }
   223  
   224  // SplitLines splits on CRLF, CR not followed by LF, and LF.
   225  func SplitLines(data []byte, atEOF bool) (adv int, token []byte, err error) { //nolint:nonamedreturns
   226  	if atEOF && len(data) == 0 {
   227  		return 0, nil, nil
   228  	}
   229  	if i := bytes.IndexAny(data, "\r\n"); i >= 0 {
   230  		if data[i] == '\n' {
   231  			return i + 1, data[0:i], nil
   232  		}
   233  		adv = i + 1
   234  		if len(data) > i+1 && data[i+1] == '\n' {
   235  			adv++
   236  		}
   237  		return adv, data[0:i], nil
   238  	}
   239  	if atEOF {
   240  		return len(data), data, nil
   241  	}
   242  	return 0, nil, nil
   243  }
   244  
   245  func normalizePath(path string) string {
   246  	// expand tilde
   247  	homedir, err := os.UserHomeDir()
   248  	if err != nil {
   249  		return path
   250  	}
   251  	if path == "~" {
   252  		return homedir
   253  	} else if strings.HasPrefix(path, filepath.FromSlash("~/")) {
   254  		path = filepath.Join(homedir, path[2:])
   255  	}
   256  	return path
   257  }
   258  
   259  func TextToContext(text string, meta *nlp.Info) []nlp.TaggedWord {
   260  	context := []nlp.TaggedWord{}
   261  
   262  	for idx, line := range strings.Split(text, "\n") {
   263  		plain := stripMarkdown(line)
   264  
   265  		pos := 0
   266  		for _, tok := range nlp.TextToTokens(plain, meta) {
   267  			if strings.TrimSpace(tok.Text) != "" {
   268  				s := strings.Index(line[pos:], tok.Text) + len(line[:pos])
   269  				if !StringInSlice(tok.Tag, []string{"''", "``"}) {
   270  					context = append(context, nlp.TaggedWord{
   271  						Line:  idx + 1,
   272  						Token: tok,
   273  						Span:  []int{s + 1, s + len(tok.Text)},
   274  					})
   275  				}
   276  				pos = s
   277  				line, _ = Substitute(line, tok.Text, '*')
   278  			}
   279  		}
   280  	}
   281  
   282  	return context
   283  }
   284  
   285  func ReplaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string {
   286  	result := ""
   287  	lastIndex := 0
   288  
   289  	for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) {
   290  		groups := []string{}
   291  		for i := 0; i < len(v); i += 2 {
   292  			if v[i] == -1 || v[i+1] == -1 {
   293  				groups = append(groups, "")
   294  			} else {
   295  				groups = append(groups, str[v[i]:v[i+1]])
   296  			}
   297  		}
   298  
   299  		result += str[lastIndex:v[0]] + repl(groups)
   300  		lastIndex = v[1]
   301  	}
   302  
   303  	return result + str[lastIndex:]
   304  }
   305  
   306  func HasAnySuffix(s string, suffixes []string) bool {
   307  	n := len(s)
   308  	for _, suffix := range suffixes {
   309  		if n > len(suffix) && strings.HasSuffix(s, suffix) {
   310  			return true
   311  		}
   312  	}
   313  	return false
   314  }
   315  
   316  // ReplaceExt replaces the extension of `fp` with `ext` if the extension of
   317  // `fp` is in `formats`.
   318  //
   319  // This is used in places where we need to normalize file extensions (e.g.,
   320  // `foo.mdx` -> `foo.md`) in order to respect format associations.
   321  func ReplaceExt(fp string, formats map[string]string) string {
   322  	var ext string
   323  
   324  	old := filepath.Ext(fp)
   325  	if normed, found := formats[strings.Trim(old, ".")]; found {
   326  		ext = "." + normed
   327  		fp = fp[0:len(fp)-len(old)] + ext
   328  	}
   329  
   330  	return fp
   331  }