github.com/errata-ai/vale/v3@v3.4.2/internal/core/util.go (about) 1 package core 2 3 import ( 4 "bytes" 5 "fmt" 6 "os" 7 "os/exec" 8 "path/filepath" 9 "regexp" 10 "strings" 11 "unicode" 12 13 "github.com/errata-ai/vale/v3/internal/nlp" 14 ) 15 16 var defaultIgnoreDirectories = []string{ 17 "node_modules", ".git", 18 } 19 var spaces = regexp.MustCompile(" +") 20 var reANSI = regexp.MustCompile("[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))") 21 var sanitizer = strings.NewReplacer( 22 "’", "'", 23 "\r\n", "\n", 24 "\r", "\n") 25 26 // CapFirst capitalizes the first letter of a string. 27 func CapFirst(s string) string { 28 if len(s) == 0 { 29 return s 30 } 31 return strings.ToUpper(s[:1]) + s[1:] 32 } 33 34 // Sanitize prepares text for our check functions. 35 func Sanitize(txt string) string { 36 // TODO: symbols? 37 return sanitizer.Replace(txt) 38 } 39 40 // StripANSI removes all ANSI characters from the given string. 41 func StripANSI(s string) string { 42 return reANSI.ReplaceAllString(s, "") 43 } 44 45 // WhitespaceToSpace converts newlines and multiple spaces (e.g., " ") into a 46 // single space. 47 func WhitespaceToSpace(msg string) string { 48 msg = strings.ReplaceAll(msg, "\n", " ") 49 msg = spaces.ReplaceAllString(msg, " ") 50 return msg 51 } 52 53 // ShouldIgnoreDirectory will check if directory should be ignored 54 func ShouldIgnoreDirectory(directoryName string) bool { 55 for _, directory := range defaultIgnoreDirectories { 56 if directory == directoryName { 57 return true 58 } 59 } 60 return false 61 } 62 63 // ToSentence converts a slice of terms into sentence. 64 func ToSentence(words []string, andOrOr string) string { 65 l := len(words) 66 67 if l == 1 { 68 return fmt.Sprintf("'%s'", words[0]) 69 } else if l == 2 { 70 return fmt.Sprintf("'%s' or '%s'", words[0], words[1]) 71 } 72 73 wordsForSentence := []string{} 74 for _, w := range words { 75 wordsForSentence = append(wordsForSentence, fmt.Sprintf("'%s'", w)) 76 } 77 78 wordsForSentence[l-1] = andOrOr + " " + wordsForSentence[l-1] 79 return strings.Join(wordsForSentence, ", ") 80 } 81 82 // IsLetter returns `true` if s contains all letter characters and false if not. 83 func IsLetter(s string) bool { 84 for _, r := range s { 85 if !unicode.IsLetter(r) { 86 return false 87 } 88 } 89 return true 90 } 91 92 // IsCode returns `true` if s is a code-like token. 93 func IsCode(s string) bool { 94 for _, r := range s { 95 if r != '*' && r != '@' { 96 return false 97 } 98 } 99 return true 100 } 101 102 // IsPhrase returns `true` is s is a phrase-like token. 103 // 104 // This is used to differentiate regex tokens from non-regex. 105 func IsPhrase(s string) bool { 106 for _, r := range s { 107 if !unicode.IsLetter(r) && r != ' ' && !unicode.IsDigit(r) && r != '-' { 108 return false 109 } 110 } 111 return true 112 } 113 114 // InRange determines if the range r contains the integer n. 115 func InRange(n int, r []int) bool { 116 return len(r) == 2 && (r[0] <= n && n <= r[1]) 117 } 118 119 // Which checks for the existence of any command in `cmds`. 120 func Which(cmds []string) string { 121 for _, cmd := range cmds { 122 path, err := exec.LookPath(cmd) 123 if err == nil { 124 return path 125 } 126 } 127 return "" 128 } 129 130 // CondSprintf is sprintf, ignores extra arguments. 131 func CondSprintf(format string, v ...interface{}) string { 132 v = append(v, "") 133 format += fmt.Sprint("%[", len(v), "]s") 134 return fmt.Sprintf(format, v...) 135 } 136 137 // FormatMessage inserts `subs` into `msg`. 138 func FormatMessage(msg string, subs ...string) string { 139 return CondSprintf(msg, StringsToInterface(subs)...) 140 } 141 142 // Substitute replaces the substring `sub` with a string of asterisks. 143 func Substitute(src, sub string, char rune) (string, bool) { 144 idx := strings.Index(src, sub) 145 if idx < 0 { 146 return src, false 147 } 148 repl := strings.Map(func(r rune) rune { 149 if r != '\n' { 150 return char 151 } 152 return r 153 }, sub) 154 return strings.Replace(src, sub, repl, 1), true 155 } 156 157 // StringsToInterface converts a slice of strings to an interface. 158 func StringsToInterface(strings []string) []interface{} { 159 intf := make([]interface{}, len(strings)) 160 for i, v := range strings { 161 intf[i] = v 162 } 163 return intf 164 } 165 166 // Indent adds padding to every line of `text`. 167 func Indent(text, indent string) string { 168 if text[len(text)-1:] == "\n" { 169 result := "" 170 for _, j := range strings.Split(text[:len(text)-1], "\n") { 171 result += indent + j + "\n" 172 } 173 return result 174 } 175 result := "" 176 for _, j := range strings.Split(strings.TrimRight(text, "\n"), "\n") { 177 result += indent + j + "\n" 178 } 179 return result[:len(result)-1] 180 } 181 182 // IsDir determines if the path given by `filename` is a directory. 183 func IsDir(filename string) bool { 184 fi, err := os.Stat(filename) 185 return err == nil && fi.IsDir() 186 } 187 188 // FileExists determines if the path given by `filename` exists. 189 func FileExists(filename string) bool { 190 _, err := os.Stat(filename) 191 return err == nil 192 } 193 194 // StringInSlice determines if `slice` contains the string `a`. 195 func StringInSlice(a string, slice []string) bool { 196 for _, b := range slice { 197 if a == b { 198 return true 199 } 200 } 201 return false 202 } 203 204 // IntInSlice determines if `slice` contains the int `a`. 205 func IntInSlice(a int, slice []int) bool { 206 for _, b := range slice { 207 if a == b { 208 return true 209 } 210 } 211 return false 212 } 213 214 // AllStringsInSlice determines if `slice` contains the `strings`. 215 func AllStringsInSlice(strings []string, slice []string) bool { 216 for _, s := range strings { 217 if !StringInSlice(s, slice) { 218 return false 219 } 220 } 221 return true 222 } 223 224 // SplitLines splits on CRLF, CR not followed by LF, and LF. 225 func SplitLines(data []byte, atEOF bool) (adv int, token []byte, err error) { //nolint:nonamedreturns 226 if atEOF && len(data) == 0 { 227 return 0, nil, nil 228 } 229 if i := bytes.IndexAny(data, "\r\n"); i >= 0 { 230 if data[i] == '\n' { 231 return i + 1, data[0:i], nil 232 } 233 adv = i + 1 234 if len(data) > i+1 && data[i+1] == '\n' { 235 adv++ 236 } 237 return adv, data[0:i], nil 238 } 239 if atEOF { 240 return len(data), data, nil 241 } 242 return 0, nil, nil 243 } 244 245 func normalizePath(path string) string { 246 // expand tilde 247 homedir, err := os.UserHomeDir() 248 if err != nil { 249 return path 250 } 251 if path == "~" { 252 return homedir 253 } else if strings.HasPrefix(path, filepath.FromSlash("~/")) { 254 path = filepath.Join(homedir, path[2:]) 255 } 256 return path 257 } 258 259 func TextToContext(text string, meta *nlp.Info) []nlp.TaggedWord { 260 context := []nlp.TaggedWord{} 261 262 for idx, line := range strings.Split(text, "\n") { 263 plain := stripMarkdown(line) 264 265 pos := 0 266 for _, tok := range nlp.TextToTokens(plain, meta) { 267 if strings.TrimSpace(tok.Text) != "" { 268 s := strings.Index(line[pos:], tok.Text) + len(line[:pos]) 269 if !StringInSlice(tok.Tag, []string{"''", "``"}) { 270 context = append(context, nlp.TaggedWord{ 271 Line: idx + 1, 272 Token: tok, 273 Span: []int{s + 1, s + len(tok.Text)}, 274 }) 275 } 276 pos = s 277 line, _ = Substitute(line, tok.Text, '*') 278 } 279 } 280 } 281 282 return context 283 } 284 285 func ReplaceAllStringSubmatchFunc(re *regexp.Regexp, str string, repl func([]string) string) string { 286 result := "" 287 lastIndex := 0 288 289 for _, v := range re.FindAllSubmatchIndex([]byte(str), -1) { 290 groups := []string{} 291 for i := 0; i < len(v); i += 2 { 292 if v[i] == -1 || v[i+1] == -1 { 293 groups = append(groups, "") 294 } else { 295 groups = append(groups, str[v[i]:v[i+1]]) 296 } 297 } 298 299 result += str[lastIndex:v[0]] + repl(groups) 300 lastIndex = v[1] 301 } 302 303 return result + str[lastIndex:] 304 } 305 306 func HasAnySuffix(s string, suffixes []string) bool { 307 n := len(s) 308 for _, suffix := range suffixes { 309 if n > len(suffix) && strings.HasSuffix(s, suffix) { 310 return true 311 } 312 } 313 return false 314 } 315 316 // ReplaceExt replaces the extension of `fp` with `ext` if the extension of 317 // `fp` is in `formats`. 318 // 319 // This is used in places where we need to normalize file extensions (e.g., 320 // `foo.mdx` -> `foo.md`) in order to respect format associations. 321 func ReplaceExt(fp string, formats map[string]string) string { 322 var ext string 323 324 old := filepath.Ext(fp) 325 if normed, found := formats[strings.Trim(old, ".")]; found { 326 ext = "." + normed 327 fp = fp[0:len(fp)-len(old)] + ext 328 } 329 330 return fp 331 }